Index: stable/12/sbin/nvmecontrol/Makefile =================================================================== --- stable/12/sbin/nvmecontrol/Makefile (revision 350945) +++ stable/12/sbin/nvmecontrol/Makefile (revision 350946) @@ -1,17 +1,18 @@ # $FreeBSD$ PACKAGE=runtime PROG= nvmecontrol SRCS= comnd.c nvmecontrol.c -SRCS+= devlist.c firmware.c format.c identify.c logpage.c ns.c perftest.c power.c reset.c +SRCS+= devlist.c firmware.c format.c identify.c logpage.c ns.c nsid.c +SRCS+= perftest.c power.c reset.c #SRCS+= passthru.c SRCS+= identify_ext.c nvme_util.c nc_util.c MAN= nvmecontrol.8 LDFLAGS+= -rdynamic LIBADD+= util SUBDIR= modules .PATH: ${SRCTOP}/sys/dev/nvme .include .include Index: stable/12/sbin/nvmecontrol/devlist.c =================================================================== --- stable/12/sbin/nvmecontrol/devlist.c (revision 350945) +++ stable/12/sbin/nvmecontrol/devlist.c (revision 350946) @@ -1,129 +1,129 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" #include "comnd.h" /* Tables for command line parsing */ #define NVME_MAX_UNIT 256 static cmd_fn_t devlist; static struct cmd devlist_cmd = { .name = "devlist", .fn = devlist, .descr = "List NVMe controllers and namespaces" }; CMD_COMMAND(devlist_cmd); /* End of tables for command line parsing */ static inline uint32_t ns_get_sector_size(struct nvme_namespace_data *nsdata) { uint8_t flbas_fmt, lbads; flbas_fmt = (nsdata->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; lbads = (nsdata->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & NVME_NS_DATA_LBAF_LBADS_MASK; return (1 << lbads); } static void devlist(const struct cmd *f, int argc, char *argv[]) { struct nvme_controller_data cdata; struct nvme_namespace_data nsdata; char name[64]; uint8_t mn[64]; uint32_t i; int ctrlr, fd, found, ret; if (arg_parse(argc, argv, f)) return; ctrlr = -1; found = 0; while (ctrlr < NVME_MAX_UNIT) { ctrlr++; sprintf(name, "%s%d", NVME_CTRLR_PREFIX, ctrlr); ret = open_dev(name, &fd, 0, 0); if (ret == EACCES) { warnx("could not open "_PATH_DEV"%s\n", name); continue; } else if (ret != 0) continue; found++; read_controller_data(fd, &cdata); nvme_strvis(mn, cdata.mn, sizeof(mn), NVME_MODEL_NUMBER_LENGTH); printf("%6s: %s\n", name, mn); for (i = 0; i < cdata.nn; i++) { - sprintf(name, "%s%d%s%d", NVME_CTRLR_PREFIX, ctrlr, - NVME_NS_PREFIX, i+1); - read_namespace_data(fd, i+1, &nsdata); + read_namespace_data(fd, i + 1, &nsdata); if (nsdata.nsze == 0) continue; + sprintf(name, "%s%d%s%d", NVME_CTRLR_PREFIX, ctrlr, + NVME_NS_PREFIX, i + 1); printf(" %10s (%lldMB)\n", name, nsdata.nsze * (long long)ns_get_sector_size(&nsdata) / 1024 / 1024); } close(fd); } if (found == 0) printf("No NVMe controllers found.\n"); exit(1); } Index: stable/12/sbin/nvmecontrol/firmware.c =================================================================== --- stable/12/sbin/nvmecontrol/firmware.c (revision 350945) +++ stable/12/sbin/nvmecontrol/firmware.c (revision 350946) @@ -1,357 +1,361 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 EMC Corp. * All rights reserved. * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" /* Tables for command line parsing */ static cmd_fn_t firmware; #define NONE 0xffffffffu static struct options { bool activate; uint32_t slot; const char *fw_img; const char *dev; } opt = { .activate = false, .slot = NONE, .fw_img = NULL, .dev = NULL, }; static const struct opts firmware_opts[] = { #define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } OPT("activate", 'a', arg_none, opt, activate, "Attempt to activate firmware"), OPT("slot", 's', arg_uint32, opt, slot, "Slot to activate and/or download firmware to"), OPT("firmware", 'f', arg_path, opt, fw_img, "Firmware image to download"), { NULL, 0, arg_none, NULL, NULL } }; #undef OPT static const struct args firmware_args[] = { { arg_string, &opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd firmware_cmd = { .name = "firmware", .fn = firmware, .descr = "Download firmware image to controller", .ctx_size = sizeof(opt), .opts = firmware_opts, .args = firmware_args, }; CMD_COMMAND(firmware_cmd); /* End of tables for command line parsing */ static int slot_has_valid_firmware(int fd, int slot) { struct nvme_firmware_page fw; int has_fw = false; read_logpage(fd, NVME_LOG_FIRMWARE_SLOT, NVME_GLOBAL_NAMESPACE_TAG, &fw, sizeof(fw)); if (fw.revision[slot-1] != 0LLU) has_fw = true; return (has_fw); } static void read_image_file(const char *path, void **buf, int32_t *size) { struct stat sb; int32_t filesize; int fd; *size = 0; *buf = NULL; if ((fd = open(path, O_RDONLY)) < 0) err(1, "unable to open '%s'", path); if (fstat(fd, &sb) < 0) err(1, "unable to stat '%s'", path); /* * The NVMe spec does not explicitly state a maximum firmware image * size, although one can be inferred from the dword size limitation * for the size and offset fields in the Firmware Image Download * command. * * Technically, the max is UINT32_MAX * sizeof(uint32_t), since the * size and offsets are specified in terms of dwords (not bytes), but * realistically INT32_MAX is sufficient here and simplifies matters * a bit. */ if (sb.st_size > INT32_MAX) errx(1, "size of file '%s' is too large (%jd bytes)", path, (intmax_t)sb.st_size); filesize = (int32_t)sb.st_size; if ((*buf = malloc(filesize)) == NULL) errx(1, "unable to malloc %d bytes", filesize); if ((*size = read(fd, *buf, filesize)) < 0) err(1, "error reading '%s'", path); /* XXX assuming no short reads */ if (*size != filesize) errx(1, "error reading '%s' (read %d bytes, requested %d bytes)", path, *size, filesize); } static void update_firmware(int fd, uint8_t *payload, int32_t payload_size) { struct nvme_pt_command pt; int32_t off, resid, size; void *chunk; off = 0; resid = payload_size; if ((chunk = aligned_alloc(PAGE_SIZE, NVME_MAX_XFER_SIZE)) == NULL) errx(1, "unable to malloc %d bytes", NVME_MAX_XFER_SIZE); while (resid > 0) { size = (resid >= NVME_MAX_XFER_SIZE) ? NVME_MAX_XFER_SIZE : resid; memcpy(chunk, payload + off, size); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD; pt.cmd.cdw10 = htole32((size / sizeof(uint32_t)) - 1); pt.cmd.cdw11 = htole32(off / sizeof(uint32_t)); pt.buf = chunk; pt.len = size; pt.is_read = 0; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "firmware download request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "firmware download request returned error"); resid -= size; off += size; } } static int activate_firmware(int fd, int slot, int activate_action) { struct nvme_pt_command pt; uint16_t sct, sc; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_FIRMWARE_ACTIVATE; pt.cmd.cdw10 = htole32((activate_action << 3) | slot); pt.is_read = 0; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "firmware activate request failed"); sct = NVME_STATUS_GET_SCT(pt.cpl.status); sc = NVME_STATUS_GET_SC(pt.cpl.status); if (sct == NVME_SCT_COMMAND_SPECIFIC && sc == NVME_SC_FIRMWARE_REQUIRES_RESET) return 1; if (nvme_completion_is_error(&pt.cpl)) errx(1, "firmware activate request returned error"); return 0; } static void firmware(const struct cmd *f, int argc, char *argv[]) { int fd = -1; int activate_action, reboot_required; char prompt[64]; void *buf = NULL; - int32_t size = 0; + int32_t size = 0, nsid; uint16_t oacs_fw; uint8_t fw_slot1_ro, fw_num_slots; struct nvme_controller_data cdata; if (arg_parse(argc, argv, f)) return; if (opt.slot == 0) { fprintf(stderr, "0 is not a valid slot number. " "Slot numbers start at 1.\n"); arg_help(argc, argv, f); } else if (opt.slot > 7 && opt.slot != NONE) { fprintf(stderr, "Slot number %s specified which is " "greater than max allowed slot number of " "7.\n", optarg); arg_help(argc, argv, f); } if (!opt.activate && opt.fw_img == NULL) { fprintf(stderr, "Neither a replace ([-f path_to_firmware]) nor " "activate ([-a]) firmware image action\n" "was specified.\n"); arg_help(argc, argv, f); } - /* Check that a controller (and not a namespace) was specified. */ - if (strstr(opt.dev, NVME_NS_PREFIX) != NULL) - arg_help(argc, argv, f); - if (opt.activate && opt.fw_img == NULL && opt.slot == 0) { fprintf(stderr, "Slot number to activate not specified.\n"); arg_help(argc, argv, f); } open_dev(opt.dev, &fd, 1, 1); + + /* Check that a controller (and not a namespace) was specified. */ + get_nsid(fd, NULL, &nsid); + if (nsid != 0) { + close(fd); + arg_help(argc, argv, f); + } + read_controller_data(fd, &cdata); oacs_fw = (cdata.oacs >> NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT) & NVME_CTRLR_DATA_OACS_FIRMWARE_MASK; if (oacs_fw == 0) errx(1, "controller does not support firmware activate/download"); fw_slot1_ro = (cdata.frmw >> NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT) & NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK; if (opt.fw_img && opt.slot == 1 && fw_slot1_ro) errx(1, "slot %d is marked as read only", opt.slot); fw_num_slots = (cdata.frmw >> NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT) & NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK; if (opt.slot > fw_num_slots) errx(1, "slot %d specified but controller only supports %d slots", opt.slot, fw_num_slots); if (opt.activate && opt.fw_img == NULL && !slot_has_valid_firmware(fd, opt.slot)) errx(1, "slot %d does not contain valid firmware,\n" "try 'nvmecontrol logpage -p 3 %s' to get a list " "of available images\n", opt.slot, opt.dev); if (opt.fw_img) read_image_file(opt.fw_img, &buf, &size); if (opt.fw_img != NULL&& opt.activate) printf("You are about to download and activate " "firmware image (%s) to controller %s.\n" "This may damage your controller and/or " "overwrite an existing firmware image.\n", opt.fw_img, opt.dev); else if (opt.activate) printf("You are about to activate a new firmware " "image on controller %s.\n" "This may damage your controller.\n", opt.dev); else if (opt.fw_img != NULL) printf("You are about to download firmware image " "(%s) to controller %s.\n" "This may damage your controller and/or " "overwrite an existing firmware image.\n", opt.fw_img, opt.dev); printf("Are you sure you want to continue? (yes/no) "); while (1) { fgets(prompt, sizeof(prompt), stdin); if (strncasecmp(prompt, "yes", 3) == 0) break; if (strncasecmp(prompt, "no", 2) == 0) exit(1); printf("Please answer \"yes\" or \"no\". "); } if (opt.fw_img != NULL) { update_firmware(fd, buf, size); if (opt.activate) activate_action = NVME_AA_REPLACE_ACTIVATE; else activate_action = NVME_AA_REPLACE_NO_ACTIVATE; } else { activate_action = NVME_AA_ACTIVATE; } reboot_required = activate_firmware(fd, opt.slot, activate_action); if (opt.activate) { if (reboot_required) { printf("New firmware image activated but requires " "conventional reset (i.e. reboot) to " "complete activation.\n"); } else { printf("New firmware image activated and will take " "effect after next controller reset.\n" "Controller reset can be initiated via " "'nvmecontrol reset %s'\n", opt.dev); } } close(fd); exit(0); } Index: stable/12/sbin/nvmecontrol/format.c =================================================================== --- stable/12/sbin/nvmecontrol/format.c (revision 350945) +++ stable/12/sbin/nvmecontrol/format.c (revision 350946) @@ -1,230 +1,220 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (C) 2018 Alexander Motin - * All rights reserved. + * Copyright (C) 2018-2019 Alexander Motin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" #define NONE 0xffffffffu #define SES_NONE 0 #define SES_USER 1 #define SES_CRYPTO 2 /* Tables for command line parsing */ static cmd_fn_t format; static struct options { uint32_t lbaf; uint32_t ms; uint32_t pi; uint32_t pil; uint32_t ses; bool Eflag; bool Cflag; const char *dev; } opt = { .lbaf = NONE, .ms = NONE, .pi = NONE, .pil = NONE, .ses = SES_NONE, .Eflag = false, .Cflag = false, .dev = NULL, }; static const struct opts format_opts[] = { #define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } OPT("crypto", 'C', arg_none, opt, Cflag, "Crptographic erase"), OPT("erase", 'E', arg_none, opt, Eflag, "User data erase"), OPT("lbaf", 'f', arg_uint32, opt, lbaf, "LBA Format to apply to the media"), OPT("ms", 'm', arg_uint32, opt, ms, "Metadata settings"), OPT("pi", 'p', arg_uint32, opt, pi, "Protective information"), OPT("pil", 'l', arg_uint32, opt, pil, "Protective information location"), OPT("ses", 's', arg_uint32, opt, ses, "Secure erase settings"), { NULL, 0, arg_none, NULL, NULL } }; #undef OPT static const struct args format_args[] = { { arg_string, &opt.dev, "controller-id|namespace-id" }, { arg_none, NULL, NULL }, }; static struct cmd format_cmd = { .name = "format", .fn = format, .descr = "Format/erase one or all the namespaces", .ctx_size = sizeof(opt), .opts = format_opts, .args = format_args, }; CMD_COMMAND(format_cmd); /* End of tables for command line parsing */ static void format(const struct cmd *f, int argc, char *argv[]) { struct nvme_controller_data cd; struct nvme_namespace_data nsd; struct nvme_pt_command pt; - char path[64]; + char *path; const char *target; uint32_t nsid; int lbaf, ms, pi, pil, ses, fd; if (arg_parse(argc, argv, f)) return; if (opt.Eflag || opt.Cflag || opt.ses != SES_NONE) { fprintf(stderr, "Only one of -E, -C or -s may be specified\n"); arg_help(argc, argv, f); } target = opt.dev; lbaf = opt.lbaf; ms = opt.ms; pi = opt.pi; pil = opt.pil; if (opt.Eflag) ses = SES_USER; else if (opt.Cflag) ses = SES_CRYPTO; else ses = opt.ses; - /* - * Check if the specified device node exists before continuing. - * This is a cleaner check for cases where the correct controller - * is specified, but an invalid namespace on that controller. - */ open_dev(target, &fd, 1, 1); - - /* - * If device node contains "ns", we consider it a namespace, - * otherwise, consider it a controller. - */ - if (strstr(target, NVME_NS_PREFIX) == NULL) { + get_nsid(fd, &path, &nsid); + if (nsid == 0) { nsid = NVME_GLOBAL_NAMESPACE_TAG; } else { /* * We send FORMAT commands to the controller, not the namespace, * since it is an admin cmd. The namespace ID will be specified * in the command itself. So parse the namespace's device node * string to get the controller substring and namespace ID. */ close(fd); - parse_ns_str(target, path, &nsid); open_dev(path, &fd, 1, 1); } + free(path); /* Check that controller can execute this command. */ read_controller_data(fd, &cd); if (((cd.oacs >> NVME_CTRLR_DATA_OACS_FORMAT_SHIFT) & NVME_CTRLR_DATA_OACS_FORMAT_MASK) == 0) errx(1, "controller does not support format"); if (((cd.fna >> NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_SHIFT) & NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK) == 0 && ses == SES_CRYPTO) errx(1, "controller does not support cryptographic erase"); if (nsid != NVME_GLOBAL_NAMESPACE_TAG) { if (((cd.fna >> NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT) & NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK) && ses == SES_NONE) errx(1, "controller does not support per-NS format"); if (((cd.fna >> NVME_CTRLR_DATA_FNA_ERASE_ALL_SHIFT) & NVME_CTRLR_DATA_FNA_ERASE_ALL_MASK) && ses != SES_NONE) errx(1, "controller does not support per-NS erase"); /* Try to keep previous namespace parameters. */ read_namespace_data(fd, nsid, &nsd); if (lbaf < 0) lbaf = (nsd.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; if (lbaf > nsd.nlbaf) errx(1, "LBA format is out of range"); if (ms < 0) ms = (nsd.flbas >> NVME_NS_DATA_FLBAS_EXTENDED_SHIFT) & NVME_NS_DATA_FLBAS_EXTENDED_MASK; if (pi < 0) pi = (nsd.dps >> NVME_NS_DATA_DPS_MD_START_SHIFT) & NVME_NS_DATA_DPS_MD_START_MASK; if (pil < 0) pil = (nsd.dps >> NVME_NS_DATA_DPS_PIT_SHIFT) & NVME_NS_DATA_DPS_PIT_MASK; } else { /* We have no previous parameters, so default to zeroes. */ if (lbaf < 0) lbaf = 0; if (ms < 0) ms = 0; if (pi < 0) pi = 0; if (pil < 0) pil = 0; } memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_FORMAT_NVM; pt.cmd.nsid = htole32(nsid); pt.cmd.cdw10 = htole32((ses << 9) + (pil << 8) + (pi << 5) + (ms << 4) + lbaf); if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "format request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "format request returned error"); close(fd); exit(0); } Index: stable/12/sbin/nvmecontrol/identify.c =================================================================== --- stable/12/sbin/nvmecontrol/identify.c (revision 350945) +++ stable/12/sbin/nvmecontrol/identify.c (revision 350946) @@ -1,290 +1,278 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. + * Copyright (C) 2018-2019 Alexander Motin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" #include "nvmecontrol_ext.h" +#define NONE 0xfffffffeu + static struct options { bool hex; bool verbose; const char *dev; uint32_t nsid; } opt = { .hex = false, .verbose = false, .dev = NULL, - .nsid = 0, + .nsid = NONE, }; void print_namespace(struct nvme_namespace_data *nsdata) { uint32_t i; uint32_t lbaf, lbads, ms, rp; uint8_t thin_prov, ptype; uint8_t flbas_fmt, t; thin_prov = (nsdata->nsfeat >> NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT) & NVME_NS_DATA_NSFEAT_THIN_PROV_MASK; flbas_fmt = (nsdata->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; printf("Size (in LBAs): %lld (%lldM)\n", (long long)nsdata->nsze, (long long)nsdata->nsze / 1024 / 1024); printf("Capacity (in LBAs): %lld (%lldM)\n", (long long)nsdata->ncap, (long long)nsdata->ncap / 1024 / 1024); printf("Utilization (in LBAs): %lld (%lldM)\n", (long long)nsdata->nuse, (long long)nsdata->nuse / 1024 / 1024); printf("Thin Provisioning: %s\n", thin_prov ? "Supported" : "Not Supported"); printf("Number of LBA Formats: %d\n", nsdata->nlbaf+1); printf("Current LBA Format: LBA Format #%02d\n", flbas_fmt); printf("Data Protection Caps: %s%s%s%s%s%s\n", (nsdata->dpc == 0) ? "Not Supported" : "", ((nsdata->dpc >> NVME_NS_DATA_DPC_MD_END_SHIFT) & NVME_NS_DATA_DPC_MD_END_MASK) ? "Last Bytes, " : "", ((nsdata->dpc >> NVME_NS_DATA_DPC_MD_START_SHIFT) & NVME_NS_DATA_DPC_MD_START_MASK) ? "First Bytes, " : "", ((nsdata->dpc >> NVME_NS_DATA_DPC_PIT3_SHIFT) & NVME_NS_DATA_DPC_PIT3_MASK) ? "Type 3, " : "", ((nsdata->dpc >> NVME_NS_DATA_DPC_PIT2_SHIFT) & NVME_NS_DATA_DPC_PIT2_MASK) ? "Type 2, " : "", ((nsdata->dpc >> NVME_NS_DATA_DPC_PIT2_MASK) & NVME_NS_DATA_DPC_PIT1_MASK) ? "Type 1" : ""); printf("Data Protection Settings: "); ptype = (nsdata->dps >> NVME_NS_DATA_DPS_PIT_SHIFT) & NVME_NS_DATA_DPS_PIT_MASK; if (ptype) { printf("Type %d, %s Bytes\n", ptype, ((nsdata->dps >> NVME_NS_DATA_DPS_MD_START_SHIFT) & NVME_NS_DATA_DPS_MD_START_MASK) ? "First" : "Last"); } else { printf("Not Enabled\n"); } printf("Multi-Path I/O Capabilities: %s%s\n", (nsdata->nmic == 0) ? "Not Supported" : "", ((nsdata->nmic >> NVME_NS_DATA_NMIC_MAY_BE_SHARED_SHIFT) & NVME_NS_DATA_NMIC_MAY_BE_SHARED_MASK) ? "May be shared" : ""); printf("Reservation Capabilities: %s%s%s%s%s%s%s%s%s\n", (nsdata->rescap == 0) ? "Not Supported" : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_IEKEY13_SHIFT) & NVME_NS_DATA_RESCAP_IEKEY13_MASK) ? "IEKEY13, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_EX_AC_AR_SHIFT) & NVME_NS_DATA_RESCAP_EX_AC_AR_MASK) ? "EX_AC_AR, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_WR_EX_AR_SHIFT) & NVME_NS_DATA_RESCAP_WR_EX_AR_MASK) ? "WR_EX_AR, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_EX_AC_RO_SHIFT) & NVME_NS_DATA_RESCAP_EX_AC_RO_MASK) ? "EX_AC_RO, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_WR_EX_RO_SHIFT) & NVME_NS_DATA_RESCAP_WR_EX_RO_MASK) ? "WR_EX_RO, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_EX_AC_SHIFT) & NVME_NS_DATA_RESCAP_EX_AC_MASK) ? "EX_AC, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_WR_EX_SHIFT) & NVME_NS_DATA_RESCAP_WR_EX_MASK) ? "WR_EX, " : "", ((nsdata->rescap >> NVME_NS_DATA_RESCAP_PTPL_SHIFT) & NVME_NS_DATA_RESCAP_PTPL_MASK) ? "PTPL" : ""); printf("Format Progress Indicator: "); if ((nsdata->fpi >> NVME_NS_DATA_FPI_SUPP_SHIFT) & NVME_NS_DATA_FPI_SUPP_MASK) { printf("%u%% remains\n", (nsdata->fpi >> NVME_NS_DATA_FPI_PERC_SHIFT) & NVME_NS_DATA_FPI_PERC_MASK); } else printf("Not Supported\n"); t = (nsdata->dlfeat >> NVME_NS_DATA_DLFEAT_READ_SHIFT) & NVME_NS_DATA_DLFEAT_READ_MASK; printf("Deallocate Logical Block: Read %s%s%s\n", (t == NVME_NS_DATA_DLFEAT_READ_NR) ? "Not Reported" : (t == NVME_NS_DATA_DLFEAT_READ_00) ? "00h" : (t == NVME_NS_DATA_DLFEAT_READ_FF) ? "FFh" : "Unknown", (nsdata->dlfeat >> NVME_NS_DATA_DLFEAT_DWZ_SHIFT) & NVME_NS_DATA_DLFEAT_DWZ_MASK ? ", Write Zero" : "", (nsdata->dlfeat >> NVME_NS_DATA_DLFEAT_GCRC_SHIFT) & NVME_NS_DATA_DLFEAT_GCRC_MASK ? ", Guard CRC" : ""); printf("Optimal I/O Boundary (LBAs): %u\n", nsdata->noiob); printf("Globally Unique Identifier: "); for (i = 0; i < sizeof(nsdata->nguid); i++) printf("%02x", nsdata->nguid[i]); printf("\n"); printf("IEEE EUI64: "); for (i = 0; i < sizeof(nsdata->eui64); i++) printf("%02x", nsdata->eui64[i]); printf("\n"); for (i = 0; i <= nsdata->nlbaf; i++) { lbaf = nsdata->lbaf[i]; lbads = (lbaf >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & NVME_NS_DATA_LBAF_LBADS_MASK; ms = (lbaf >> NVME_NS_DATA_LBAF_MS_SHIFT) & NVME_NS_DATA_LBAF_MS_MASK; rp = (lbaf >> NVME_NS_DATA_LBAF_RP_SHIFT) & NVME_NS_DATA_LBAF_RP_MASK; printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d" " Performance: %s\n", i, 1 << lbads, ms, (rp == 0) ? "Best" : (rp == 1) ? "Better" : (rp == 2) ? "Good" : "Degraded"); } } static void -identify_ctrlr(const struct cmd *f, int argc, char *argv[]) +identify_ctrlr(int fd) { struct nvme_controller_data cdata; - int fd, hexlength; + int hexlength; - open_dev(opt.dev, &fd, 1, 1); read_controller_data(fd, &cdata); close(fd); if (opt.hex) { if (opt.verbose) hexlength = sizeof(struct nvme_controller_data); else hexlength = offsetof(struct nvme_controller_data, reserved8); print_hex(&cdata, hexlength); exit(0); } - if (opt.verbose) { - fprintf(stderr, "-v not currently supported without -x\n"); - arg_help(argc, argv, f); - } - nvme_print_controller(&cdata); exit(0); } static void -identify_ns(const struct cmd *f, int argc, char *argv[]) +identify_ns(int fd, uint32_t nsid) { struct nvme_namespace_data nsdata; - char path[64]; - int fd, hexlength; - uint32_t nsid; + int hexlength; - open_dev(opt.dev, &fd, 1, 1); - if (strstr(opt.dev, NVME_NS_PREFIX) != NULL) { - /* - * Now we know that provided device name is valid, that is - * good for error reporting if specified controller name is - * valid, but namespace ID is not. But we send IDENTIFY - * commands to the controller, not the namespace, since it - * is an admin cmd. The namespace ID will be specified in - * the IDENTIFY command itself. So parse the namespace's - * device node string to get the controller device substring - * and namespace ID. - */ - close(fd); - parse_ns_str(opt.dev, path, &nsid); - open_dev(path, &fd, 1, 1); - } else { - nsid = opt.nsid; - } read_namespace_data(fd, nsid, &nsdata); close(fd); if (opt.hex) { if (opt.verbose) hexlength = sizeof(struct nvme_namespace_data); else hexlength = offsetof(struct nvme_namespace_data, reserved6); print_hex(&nsdata, hexlength); exit(0); } - if (opt.verbose) { - fprintf(stderr, "-v not currently supported without -x\n"); - arg_help(argc, argv, f); - } - print_namespace(&nsdata); exit(0); } static void identify(const struct cmd *f, int argc, char *argv[]) { + char *path; + int fd; + uint32_t nsid; + arg_parse(argc, argv, f); - /* - * If device node contains "ns" or nsid is specified, we consider - * it a namespace request, otherwise, consider it a controller. - */ - if (strstr(opt.dev, NVME_NS_PREFIX) == NULL && opt.nsid == 0) - identify_ctrlr(f, argc, argv); + open_dev(opt.dev, &fd, 1, 1); + get_nsid(fd, &path, &nsid); + if (nsid != 0) { + /* + * We got namespace device, but we need to send IDENTIFY + * commands to the controller, not the namespace, since it + * is an admin cmd. The namespace ID will be specified in + * the IDENTIFY command itself. + */ + close(fd); + open_dev(path, &fd, 1, 1); + } + free(path); + if (opt.nsid != NONE) + nsid = opt.nsid; + + if (nsid == 0) + identify_ctrlr(fd); else - identify_ns(f, argc, argv); + identify_ns(fd, nsid); } static const struct opts identify_opts[] = { #define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } OPT("hex", 'x', arg_none, opt, hex, "Print identiy information in hex"), OPT("verbose", 'v', arg_none, opt, verbose, "More verbosity: print entire identify table"), OPT("nsid", 'n', arg_uint32, opt, nsid, "Namespace ID to use if not in device name"), { NULL, 0, arg_none, NULL, NULL } }; #undef OPT static const struct args identify_args[] = { { arg_string, &opt.dev, "controller-id|namespace-id" }, { arg_none, NULL, NULL }, }; static struct cmd identify_cmd = { .name = "identify", .fn = identify, .descr = "Print summary of the IDENTIFY information", .ctx_size = sizeof(opt), .opts = identify_opts, .args = identify_args, }; CMD_COMMAND(identify_cmd); Index: stable/12/sbin/nvmecontrol/identify_ext.c =================================================================== --- stable/12/sbin/nvmecontrol/identify_ext.c (revision 350945) +++ stable/12/sbin/nvmecontrol/identify_ext.c (revision 350946) @@ -1,246 +1,247 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. + * Copyright (C) 2018-2019 Alexander Motin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" #include "nvmecontrol_ext.h" void nvme_print_controller(struct nvme_controller_data *cdata) { uint8_t str[128]; char cbuf[UINT128_DIG + 1]; uint16_t oncs, oacs; uint8_t compare, write_unc, dsm, t; uint8_t security, fmt, fw, nsmgmt; uint8_t fw_slot1_ro, fw_num_slots; uint8_t ns_smart; uint8_t sqes_max, sqes_min; uint8_t cqes_max, cqes_min; oncs = cdata->oncs; compare = (oncs >> NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT) & NVME_CTRLR_DATA_ONCS_COMPARE_MASK; write_unc = (oncs >> NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT) & NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK; dsm = (oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) & NVME_CTRLR_DATA_ONCS_DSM_MASK; oacs = cdata->oacs; security = (oacs >> NVME_CTRLR_DATA_OACS_SECURITY_SHIFT) & NVME_CTRLR_DATA_OACS_SECURITY_MASK; fmt = (oacs >> NVME_CTRLR_DATA_OACS_FORMAT_SHIFT) & NVME_CTRLR_DATA_OACS_FORMAT_MASK; fw = (oacs >> NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT) & NVME_CTRLR_DATA_OACS_FIRMWARE_MASK; nsmgmt = (oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK; fw_num_slots = (cdata->frmw >> NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT) & NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK; fw_slot1_ro = (cdata->frmw >> NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT) & NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK; ns_smart = (cdata->lpa >> NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT) & NVME_CTRLR_DATA_LPA_NS_SMART_MASK; sqes_min = (cdata->sqes >> NVME_CTRLR_DATA_SQES_MIN_SHIFT) & NVME_CTRLR_DATA_SQES_MIN_MASK; sqes_max = (cdata->sqes >> NVME_CTRLR_DATA_SQES_MAX_SHIFT) & NVME_CTRLR_DATA_SQES_MAX_MASK; cqes_min = (cdata->cqes >> NVME_CTRLR_DATA_CQES_MIN_SHIFT) & NVME_CTRLR_DATA_CQES_MIN_MASK; cqes_max = (cdata->cqes >> NVME_CTRLR_DATA_CQES_MAX_SHIFT) & NVME_CTRLR_DATA_CQES_MAX_MASK; printf("Controller Capabilities/Features\n"); printf("================================\n"); printf("Vendor ID: %04x\n", cdata->vid); printf("Subsystem Vendor ID: %04x\n", cdata->ssvid); nvme_strvis(str, cdata->sn, sizeof(str), NVME_SERIAL_NUMBER_LENGTH); printf("Serial Number: %s\n", str); nvme_strvis(str, cdata->mn, sizeof(str), NVME_MODEL_NUMBER_LENGTH); printf("Model Number: %s\n", str); nvme_strvis(str, cdata->fr, sizeof(str), NVME_FIRMWARE_REVISION_LENGTH); printf("Firmware Version: %s\n", str); printf("Recommended Arb Burst: %d\n", cdata->rab); printf("IEEE OUI Identifier: %02x %02x %02x\n", cdata->ieee[0], cdata->ieee[1], cdata->ieee[2]); printf("Multi-Path I/O Capabilities: %s%s%s%s%s\n", (cdata->mic == 0) ? "Not Supported" : "", ((cdata->mic >> NVME_CTRLR_DATA_MIC_ANAR_SHIFT) & NVME_CTRLR_DATA_MIC_SRIOVVF_MASK) ? "Asymmetric, " : "", ((cdata->mic >> NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT) & NVME_CTRLR_DATA_MIC_SRIOVVF_MASK) ? "SR-IOV VF, " : "", ((cdata->mic >> NVME_CTRLR_DATA_MIC_MCTRLRS_SHIFT) & NVME_CTRLR_DATA_MIC_MCTRLRS_MASK) ? "Multiple controllers, " : "", ((cdata->mic >> NVME_CTRLR_DATA_MIC_MPORTS_SHIFT) & NVME_CTRLR_DATA_MIC_MPORTS_MASK) ? "Multiple ports" : ""); /* TODO: Use CAP.MPSMIN to determine true memory page size. */ printf("Max Data Transfer Size: "); if (cdata->mdts == 0) printf("Unlimited\n"); else printf("%ld\n", PAGE_SIZE * (1L << cdata->mdts)); printf("Controller ID: 0x%04x\n", cdata->ctrlr_id); printf("Version: %d.%d.%d\n", (cdata->ver >> 16) & 0xffff, (cdata->ver >> 8) & 0xff, cdata->ver & 0xff); printf("\n"); printf("Admin Command Set Attributes\n"); printf("============================\n"); printf("Security Send/Receive: %s\n", security ? "Supported" : "Not Supported"); printf("Format NVM: %s\n", fmt ? "Supported" : "Not Supported"); printf("Firmware Activate/Download: %s\n", fw ? "Supported" : "Not Supported"); printf("Namespace Managment: %s\n", nsmgmt ? "Supported" : "Not Supported"); printf("Device Self-test: %sSupported\n", ((oacs >> NVME_CTRLR_DATA_OACS_SELFTEST_SHIFT) & NVME_CTRLR_DATA_OACS_SELFTEST_MASK) ? "" : "Not "); printf("Directives: %sSupported\n", ((oacs >> NVME_CTRLR_DATA_OACS_DIRECTIVES_SHIFT) & NVME_CTRLR_DATA_OACS_DIRECTIVES_MASK) ? "" : "Not "); printf("NVMe-MI Send/Receive: %sSupported\n", ((oacs >> NVME_CTRLR_DATA_OACS_NVMEMI_SHIFT) & NVME_CTRLR_DATA_OACS_NVMEMI_MASK) ? "" : "Not "); printf("Virtualization Management: %sSupported\n", ((oacs >> NVME_CTRLR_DATA_OACS_VM_SHIFT) & NVME_CTRLR_DATA_OACS_VM_MASK) ? "" : "Not "); printf("Doorbell Buffer Config: %sSupported\n", ((oacs >> NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT) & NVME_CTRLR_DATA_OACS_DBBUFFER_MASK) ? "" : "Not "); printf("Get LBA Status: %sSupported\n", ((oacs >> NVME_CTRLR_DATA_OACS_GETLBA_SHIFT) & NVME_CTRLR_DATA_OACS_GETLBA_MASK) ? "" : "Not "); printf("Sanitize: "); if (cdata->sanicap != 0) { printf("%s%s%s\n", ((cdata->sanicap >> NVME_CTRLR_DATA_SANICAP_CES_SHIFT) & NVME_CTRLR_DATA_SANICAP_CES_SHIFT) ? "crypto, " : "", ((cdata->sanicap >> NVME_CTRLR_DATA_SANICAP_BES_SHIFT) & NVME_CTRLR_DATA_SANICAP_BES_SHIFT) ? "block, " : "", ((cdata->sanicap >> NVME_CTRLR_DATA_SANICAP_OWS_SHIFT) & NVME_CTRLR_DATA_SANICAP_OWS_SHIFT) ? "overwrite" : ""); } else { printf("Not Supported\n"); } printf("Abort Command Limit: %d\n", cdata->acl+1); printf("Async Event Request Limit: %d\n", cdata->aerl+1); printf("Number of Firmware Slots: "); if (fw != 0) printf("%d\n", fw_num_slots); else printf("N/A\n"); printf("Firmware Slot 1 Read-Only: "); if (fw != 0) printf("%s\n", fw_slot1_ro ? "Yes" : "No"); else printf("N/A\n"); printf("Per-Namespace SMART Log: %s\n", ns_smart ? "Yes" : "No"); printf("Error Log Page Entries: %d\n", cdata->elpe+1); printf("Number of Power States: %d\n", cdata->npss+1); printf("\n"); printf("NVM Command Set Attributes\n"); printf("==========================\n"); printf("Submission Queue Entry Size\n"); printf(" Max: %d\n", 1 << sqes_max); printf(" Min: %d\n", 1 << sqes_min); printf("Completion Queue Entry Size\n"); printf(" Max: %d\n", 1 << cqes_max); printf(" Min: %d\n", 1 << cqes_min); printf("Number of Namespaces: %d\n", cdata->nn); printf("Compare Command: %s\n", compare ? "Supported" : "Not Supported"); printf("Write Uncorrectable Command: %s\n", write_unc ? "Supported" : "Not Supported"); printf("Dataset Management Command: %s\n", dsm ? "Supported" : "Not Supported"); printf("Write Zeroes Command: %sSupported\n", ((oncs >> NVME_CTRLR_DATA_ONCS_WRZERO_SHIFT) & NVME_CTRLR_DATA_ONCS_WRZERO_MASK) ? "" : "Not "); printf("Save Features: %sSupported\n", ((oncs >> NVME_CTRLR_DATA_ONCS_SAVEFEAT_SHIFT) & NVME_CTRLR_DATA_ONCS_SAVEFEAT_MASK) ? "" : "Not "); printf("Reservations: %sSupported\n", ((oncs >> NVME_CTRLR_DATA_ONCS_RESERV_SHIFT) & NVME_CTRLR_DATA_ONCS_RESERV_MASK) ? "" : "Not "); printf("Timestamp feature: %sSupported\n", ((oncs >> NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT) & NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK) ? "" : "Not "); printf("Verify feature: %sSupported\n", ((oncs >> NVME_CTRLR_DATA_ONCS_VERIFY_SHIFT) & NVME_CTRLR_DATA_ONCS_VERIFY_MASK) ? "" : "Not "); printf("Fused Operation Support: %s%s\n", (cdata->fuses == 0) ? "Not Supported" : "", ((cdata->fuses >> NVME_CTRLR_DATA_FUSES_CNW_SHIFT) & NVME_CTRLR_DATA_FUSES_CNW_MASK) ? "Compare and Write" : ""); printf("Format NVM Attributes: %s%s Erase, %s Format\n", ((cdata->fna >> NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_SHIFT) & NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK) ? "Crypto Erase, " : "", ((cdata->fna >> NVME_CTRLR_DATA_FNA_ERASE_ALL_SHIFT) & NVME_CTRLR_DATA_FNA_ERASE_ALL_MASK) ? "All-NVM" : "Per-NS", ((cdata->fna >> NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT) & NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK) ? "All-NVM" : "Per-NS"); t = (cdata->vwc >> NVME_CTRLR_DATA_VWC_ALL_SHIFT) & NVME_CTRLR_DATA_VWC_ALL_MASK; printf("Volatile Write Cache: %s%s\n", ((cdata->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & NVME_CTRLR_DATA_VWC_PRESENT_MASK) ? "Present" : "Not Present", (t == NVME_CTRLR_DATA_VWC_ALL_NO) ? ", no flush all" : (t == NVME_CTRLR_DATA_VWC_ALL_YES) ? ", flush all" : ""); if (nsmgmt) { printf("\n"); printf("Namespace Drive Attributes\n"); printf("==========================\n"); printf("NVM total cap: %s\n", uint128_to_str(to128(cdata->untncap.tnvmcap), cbuf, sizeof(cbuf))); printf("NVM unallocated cap: %s\n", uint128_to_str(to128(cdata->untncap.unvmcap), cbuf, sizeof(cbuf))); } } Index: stable/12/sbin/nvmecontrol/logpage.c =================================================================== --- stable/12/sbin/nvmecontrol/logpage.c (revision 350945) +++ stable/12/sbin/nvmecontrol/logpage.c (revision 350946) @@ -1,490 +1,489 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 EMC Corp. * All rights reserved. * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" /* Tables for command line parsing */ static cmd_fn_t logpage; #define NONE 0xffffffffu static struct options { bool binary; bool hex; uint32_t page; const char *vendor; const char *dev; } opt = { .binary = false, .hex = false, .page = NONE, .vendor = NULL, .dev = NULL, }; static const struct opts logpage_opts[] = { #define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } OPT("binary", 'b', arg_none, opt, binary, "Dump the log page as binary"), OPT("hex", 'x', arg_none, opt, hex, "Dump the log page as hex"), OPT("page", 'p', arg_uint32, opt, page, "Page to dump"), OPT("vendor", 'v', arg_string, opt, vendor, "Vendor specific formatting"), { NULL, 0, arg_none, NULL, NULL } }; #undef OPT static const struct args logpage_args[] = { { arg_string, &opt.dev, "" }, { arg_none, NULL, NULL }, }; static struct cmd logpage_cmd = { .name = "logpage", .fn = logpage, .descr = "Print logpages in human-readable form", .ctx_size = sizeof(opt), .opts = logpage_opts, .args = logpage_args, }; CMD_COMMAND(logpage_cmd); /* End of tables for command line parsing */ #define MAX_FW_SLOTS (7) static SLIST_HEAD(,logpage_function) logpages; void logpage_register(struct logpage_function *p) { SLIST_INSERT_HEAD(&logpages, p, link); } const char * kv_lookup(const struct kv_name *kv, size_t kv_count, uint32_t key) { static char bad[32]; size_t i; for (i = 0; i < kv_count; i++, kv++) if (kv->key == key) return kv->name; snprintf(bad, sizeof(bad), "Attribute %#x", key); return bad; } static void print_log_hex(const struct nvme_controller_data *cdata __unused, void *data, uint32_t length) { print_hex(data, length); } static void print_bin(const struct nvme_controller_data *cdata __unused, void *data, uint32_t length) { write(STDOUT_FILENO, data, length); } static void * get_log_buffer(uint32_t size) { void *buf; if ((buf = malloc(size)) == NULL) errx(1, "unable to malloc %u bytes", size); memset(buf, 0, size); return (buf); } void read_logpage(int fd, uint8_t log_page, uint32_t nsid, void *payload, uint32_t payload_size) { struct nvme_pt_command pt; struct nvme_error_information_entry *err_entry; int i, err_pages; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_GET_LOG_PAGE; pt.cmd.nsid = htole32(nsid); pt.cmd.cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16; pt.cmd.cdw10 |= log_page; pt.cmd.cdw10 = htole32(pt.cmd.cdw10); pt.buf = payload; pt.len = payload_size; pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "get log page request failed"); /* Convert data to host endian */ switch (log_page) { case NVME_LOG_ERROR: err_entry = (struct nvme_error_information_entry *)payload; err_pages = payload_size / sizeof(struct nvme_error_information_entry); for (i = 0; i < err_pages; i++) nvme_error_information_entry_swapbytes(err_entry++); break; case NVME_LOG_HEALTH_INFORMATION: nvme_health_information_page_swapbytes( (struct nvme_health_information_page *)payload); break; case NVME_LOG_FIRMWARE_SLOT: nvme_firmware_page_swapbytes( (struct nvme_firmware_page *)payload); break; case INTEL_LOG_TEMP_STATS: intel_log_temp_stats_swapbytes( (struct intel_log_temp_stats *)payload); break; default: break; } if (nvme_completion_is_error(&pt.cpl)) errx(1, "get log page request returned error"); } static void print_log_error(const struct nvme_controller_data *cdata __unused, void *buf, uint32_t size) { int i, nentries; uint16_t status; uint8_t p, sc, sct, m, dnr; struct nvme_error_information_entry *entry = buf; printf("Error Information Log\n"); printf("=====================\n"); if (entry->error_count == 0) { printf("No error entries found\n"); return; } nentries = size/sizeof(struct nvme_error_information_entry); for (i = 0; i < nentries; i++, entry++) { if (entry->error_count == 0) break; status = entry->status; p = NVME_STATUS_GET_P(status); sc = NVME_STATUS_GET_SC(status); sct = NVME_STATUS_GET_SCT(status); m = NVME_STATUS_GET_M(status); dnr = NVME_STATUS_GET_DNR(status); printf("Entry %02d\n", i + 1); printf("=========\n"); printf(" Error count: %ju\n", entry->error_count); printf(" Submission queue ID: %u\n", entry->sqid); printf(" Command ID: %u\n", entry->cid); /* TODO: Export nvme_status_string structures from kernel? */ printf(" Status:\n"); printf(" Phase tag: %d\n", p); printf(" Status code: %d\n", sc); printf(" Status code type: %d\n", sct); printf(" More: %d\n", m); printf(" DNR: %d\n", dnr); printf(" Error location: %u\n", entry->error_location); printf(" LBA: %ju\n", entry->lba); printf(" Namespace ID: %u\n", entry->nsid); printf(" Vendor specific info: %u\n", entry->vendor_specific); } } void print_temp(uint16_t t) { printf("%u K, %2.2f C, %3.2f F\n", t, (float)t - 273.15, (float)t * 9 / 5 - 459.67); } static void print_log_health(const struct nvme_controller_data *cdata __unused, void *buf, uint32_t size __unused) { struct nvme_health_information_page *health = buf; char cbuf[UINT128_DIG + 1]; uint8_t warning; int i; warning = health->critical_warning; printf("SMART/Health Information Log\n"); printf("============================\n"); printf("Critical Warning State: 0x%02x\n", warning); printf(" Available spare: %d\n", !!(warning & NVME_CRIT_WARN_ST_AVAILABLE_SPARE)); printf(" Temperature: %d\n", !!(warning & NVME_CRIT_WARN_ST_TEMPERATURE)); printf(" Device reliability: %d\n", !!(warning & NVME_CRIT_WARN_ST_DEVICE_RELIABILITY)); printf(" Read only: %d\n", !!(warning & NVME_CRIT_WARN_ST_READ_ONLY)); printf(" Volatile memory backup: %d\n", !!(warning & NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP)); printf("Temperature: "); print_temp(health->temperature); printf("Available spare: %u\n", health->available_spare); printf("Available spare threshold: %u\n", health->available_spare_threshold); printf("Percentage used: %u\n", health->percentage_used); printf("Data units (512,000 byte) read: %s\n", uint128_to_str(to128(health->data_units_read), cbuf, sizeof(cbuf))); printf("Data units written: %s\n", uint128_to_str(to128(health->data_units_written), cbuf, sizeof(cbuf))); printf("Host read commands: %s\n", uint128_to_str(to128(health->host_read_commands), cbuf, sizeof(cbuf))); printf("Host write commands: %s\n", uint128_to_str(to128(health->host_write_commands), cbuf, sizeof(cbuf))); printf("Controller busy time (minutes): %s\n", uint128_to_str(to128(health->controller_busy_time), cbuf, sizeof(cbuf))); printf("Power cycles: %s\n", uint128_to_str(to128(health->power_cycles), cbuf, sizeof(cbuf))); printf("Power on hours: %s\n", uint128_to_str(to128(health->power_on_hours), cbuf, sizeof(cbuf))); printf("Unsafe shutdowns: %s\n", uint128_to_str(to128(health->unsafe_shutdowns), cbuf, sizeof(cbuf))); printf("Media errors: %s\n", uint128_to_str(to128(health->media_errors), cbuf, sizeof(cbuf))); printf("No. error info log entries: %s\n", uint128_to_str(to128(health->num_error_info_log_entries), cbuf, sizeof(cbuf))); printf("Warning Temp Composite Time: %d\n", health->warning_temp_time); printf("Error Temp Composite Time: %d\n", health->error_temp_time); for (i = 0; i < 8; i++) { if (health->temp_sensor[i] == 0) continue; printf("Temperature Sensor %d: ", i + 1); print_temp(health->temp_sensor[i]); } } static void print_log_firmware(const struct nvme_controller_data *cdata, void *buf, uint32_t size __unused) { int i, slots; const char *status; struct nvme_firmware_page *fw = buf; uint8_t afi_slot; uint16_t oacs_fw; uint8_t fw_num_slots; afi_slot = fw->afi >> NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT; afi_slot &= NVME_FIRMWARE_PAGE_AFI_SLOT_MASK; oacs_fw = (cdata->oacs >> NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT) & NVME_CTRLR_DATA_OACS_FIRMWARE_MASK; fw_num_slots = (cdata->frmw >> NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT) & NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK; printf("Firmware Slot Log\n"); printf("=================\n"); if (oacs_fw == 0) slots = 1; else slots = MIN(fw_num_slots, MAX_FW_SLOTS); for (i = 0; i < slots; i++) { printf("Slot %d: ", i + 1); if (afi_slot == i + 1) status = " Active"; else status = "Inactive"; if (fw->revision[i] == 0LLU) printf("Empty\n"); else if (isprint(*(char *)&fw->revision[i])) printf("[%s] %.8s\n", status, (char *)&fw->revision[i]); else printf("[%s] %016jx\n", status, fw->revision[i]); } } /* * Table of log page printer / sizing. * * Make sure you keep all the pages of one vendor together so -v help * lists all the vendors pages. */ NVME_LOGPAGE(error, NVME_LOG_ERROR, NULL, "Drive Error Log", print_log_error, 0); NVME_LOGPAGE(health, NVME_LOG_HEALTH_INFORMATION, NULL, "Health/SMART Data", print_log_health, sizeof(struct nvme_health_information_page)); NVME_LOGPAGE(fw, NVME_LOG_FIRMWARE_SLOT, NULL, "Firmware Information", print_log_firmware, sizeof(struct nvme_firmware_page)); static void logpage_help(void) { const struct logpage_function *f; const char *v; fprintf(stderr, "\n"); fprintf(stderr, "%-8s %-10s %s\n", "Page", "Vendor","Page Name"); fprintf(stderr, "-------- ---------- ----------\n"); SLIST_FOREACH(f, &logpages, link) { v = f->vendor == NULL ? "-" : f->vendor; fprintf(stderr, "0x%02x %-10s %s\n", f->log_page, v, f->name); } exit(1); } static void logpage(const struct cmd *f, int argc, char *argv[]) { int fd; - bool ns_specified; - char cname[64]; + char *path; uint32_t nsid, size; void *buf; const struct logpage_function *lpf; struct nvme_controller_data cdata; print_fn_t print_fn; uint8_t ns_smart; if (arg_parse(argc, argv, f)) return; if (opt.hex && opt.binary) { fprintf(stderr, "Can't specify both binary and hex\n"); arg_help(argc, argv, f); } if (opt.vendor != NULL && strcmp(opt.vendor, "help") == 0) logpage_help(); if (opt.page == NONE) { fprintf(stderr, "Missing page_id (-p).\n"); arg_help(argc, argv, f); } - if (strstr(opt.dev, NVME_NS_PREFIX) != NULL) { - ns_specified = true; - parse_ns_str(opt.dev, cname, &nsid); - open_dev(cname, &fd, 1, 1); - } else { - ns_specified = false; + open_dev(opt.dev, &fd, 1, 1); + get_nsid(fd, &path, &nsid); + if (nsid == 0) { nsid = NVME_GLOBAL_NAMESPACE_TAG; - open_dev(opt.dev, &fd, 1, 1); + } else { + close(fd); + open_dev(path, &fd, 1, 1); } + free(path); read_controller_data(fd, &cdata); ns_smart = (cdata.lpa >> NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT) & NVME_CTRLR_DATA_LPA_NS_SMART_MASK; /* * The log page attribtues indicate whether or not the controller * supports the SMART/Health information log page on a per * namespace basis. */ - if (ns_specified) { + if (nsid != NVME_GLOBAL_NAMESPACE_TAG) { if (opt.page != NVME_LOG_HEALTH_INFORMATION) errx(1, "log page %d valid only at controller level", opt.page); if (ns_smart == 0) errx(1, "controller does not support per namespace " "smart/health information"); } print_fn = print_log_hex; size = DEFAULT_SIZE; if (opt.binary) print_fn = print_bin; if (!opt.binary && !opt.hex) { /* * See if there is a pretty print function for the specified log * page. If one isn't found, we just revert to the default * (print_hex). If there was a vendor specified by the user, and * the page is vendor specific, don't match the print function * unless the vendors match. */ SLIST_FOREACH(lpf, &logpages, link) { if (lpf->vendor != NULL && opt.vendor != NULL && strcmp(lpf->vendor, opt.vendor) != 0) continue; if (opt.page != lpf->log_page) continue; print_fn = lpf->print_fn; size = lpf->size; break; } } if (opt.page == NVME_LOG_ERROR) { size = sizeof(struct nvme_error_information_entry); size *= (cdata.elpe + 1); } /* Read the log page */ buf = get_log_buffer(size); read_logpage(fd, opt.page, nsid, buf, size); print_fn(&cdata, buf, size); close(fd); exit(0); } Index: stable/12/sbin/nvmecontrol/ns.c =================================================================== --- stable/12/sbin/nvmecontrol/ns.c (revision 350945) +++ stable/12/sbin/nvmecontrol/ns.c (revision 350946) @@ -1,884 +1,884 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 Netflix, Inc - * Copyright (C) 2018 Alexander Motin + * Copyright (C) 2018-2019 Alexander Motin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" /* Tables for command line parsing */ static cmd_fn_t ns; static cmd_fn_t nsactive; static cmd_fn_t nsallocated; static cmd_fn_t nscontrollers; static cmd_fn_t nscreate; static cmd_fn_t nsdelete; static cmd_fn_t nsattach; static cmd_fn_t nsdetach; static cmd_fn_t nsattached; static cmd_fn_t nsidentify; #define NONE 0xffffffffu #define NONE64 0xffffffffffffffffull #define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } #define OPT_END { NULL, 0, arg_none, NULL, NULL } static struct cmd ns_cmd = { .name = "ns", .fn = ns, .descr = "Namespace management commands", .ctx_size = 0, .opts = NULL, .args = NULL, }; CMD_COMMAND(ns_cmd); static struct active_options { const char *dev; } active_opt = { .dev = NULL, }; static const struct args active_args[] = { { arg_string, &active_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd active_cmd = { .name = "active", .fn = nsactive, .descr = "List active (attached) namespaces", .ctx_size = sizeof(active_opt), .opts = NULL, .args = active_args, }; CMD_SUBCOMMAND(ns_cmd, active_cmd); static struct cmd allocated_cmd = { .name = "allocated", .fn = nsallocated, .descr = "List allocated (created) namespaces", .ctx_size = sizeof(active_opt), .opts = NULL, .args = active_args, }; CMD_SUBCOMMAND(ns_cmd, allocated_cmd); static struct controllers_options { const char *dev; } controllers_opt = { .dev = NULL, }; static const struct args controllers_args[] = { { arg_string, &controllers_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd controllers_cmd = { .name = "controllers", .fn = nscontrollers, .descr = "List all controllers in NVM subsystem", .ctx_size = sizeof(controllers_opt), .opts = NULL, .args = controllers_args, }; CMD_SUBCOMMAND(ns_cmd, controllers_cmd); static struct create_options { uint64_t nsze; uint64_t cap; uint32_t lbaf; uint32_t mset; uint32_t nmic; uint32_t pi; uint32_t pil; uint32_t flbas; uint32_t dps; // uint32_t block_size; const char *dev; } create_opt = { .nsze = NONE64, .cap = NONE64, .lbaf = NONE, .mset = NONE, .nmic = NONE, .pi = NONE, .pil = NONE, .flbas = NONE, .dps = NONE, .dev = NULL, // .block_size = NONE, }; static const struct opts create_opts[] = { OPT("nsze", 's', arg_uint64, create_opt, nsze, "The namespace size"), OPT("ncap", 'c', arg_uint64, create_opt, cap, "The capacity of the namespace (<= ns size)"), OPT("lbaf", 'f', arg_uint32, create_opt, lbaf, "The FMT field of the FLBAS"), OPT("mset", 'm', arg_uint32, create_opt, mset, "The MSET field of the FLBAS"), OPT("nmic", 'n', arg_uint32, create_opt, nmic, "Namespace multipath and sharing capabilities"), OPT("pi", 'p', arg_uint32, create_opt, pi, "PI field of FLBAS"), OPT("pil", 'l', arg_uint32, create_opt, pil, "PIL field of FLBAS"), OPT("flbas", 'L', arg_uint32, create_opt, flbas, "Namespace formatted logical block size setting"), OPT("dps", 'd', arg_uint32, create_opt, dps, "Data protection settings"), // OPT("block-size", 'b', arg_uint32, create_opt, block_size, // "Blocksize of the namespace"), OPT_END }; static const struct args create_args[] = { { arg_string, &create_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd create_cmd = { .name = "create", .fn = nscreate, .descr = "Create a namespace", .ctx_size = sizeof(create_opt), .opts = create_opts, .args = create_args, }; CMD_SUBCOMMAND(ns_cmd, create_cmd); static struct delete_options { uint32_t nsid; const char *dev; } delete_opt = { .nsid = NONE, .dev = NULL, }; static const struct opts delete_opts[] = { OPT("namespace-id", 'n', arg_uint32, delete_opt, nsid, "The namespace ID to delete"), OPT_END }; static const struct args delete_args[] = { { arg_string, &delete_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd delete_cmd = { .name = "delete", .fn = nsdelete, .descr = "Delete a namespace", .ctx_size = sizeof(delete_opt), .opts = delete_opts, .args = delete_args, }; CMD_SUBCOMMAND(ns_cmd, delete_cmd); static struct attach_options { uint32_t nsid; uint32_t ctrlrid; const char *dev; } attach_opt = { .nsid = NONE, .ctrlrid = NONE - 1, .dev = NULL, }; static const struct opts attach_opts[] = { OPT("namespace-id", 'n', arg_uint32, attach_opt, nsid, "The namespace ID to attach"), OPT("controller", 'c', arg_uint32, attach_opt, ctrlrid, "The controller ID to attach"), OPT_END }; static const struct args attach_args[] = { { arg_string, &attach_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd attach_cmd = { .name = "attach", .fn = nsattach, .descr = "Attach a controller to a namespace", .ctx_size = sizeof(attach_opt), .opts = attach_opts, .args = attach_args, }; CMD_SUBCOMMAND(ns_cmd, attach_cmd); static struct attached_options { uint32_t nsid; const char *dev; } attached_opt = { .nsid = NONE, .dev = NULL, }; static const struct opts attached_opts[] = { OPT("namespace-id", 'n', arg_uint32, attached_opt, nsid, "The namespace ID to request attached controllers"), OPT_END }; static const struct args attached_args[] = { { arg_string, &attached_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd attached_cmd = { .name = "attached", .fn = nsattached, .descr = "List controllers attached to a namespace", .ctx_size = sizeof(attached_opt), .opts = attached_opts, .args = attached_args, }; CMD_SUBCOMMAND(ns_cmd, attached_cmd); static struct detach_options { uint32_t nsid; uint32_t ctrlrid; const char *dev; } detach_opt = { .nsid = NONE, .ctrlrid = NONE - 1, .dev = NULL, }; static const struct opts detach_opts[] = { OPT("namespace-id", 'n', arg_uint32, detach_opt, nsid, "The namespace ID to detach"), OPT("controller", 'c', arg_uint32, detach_opt, ctrlrid, "The controller ID to detach"), OPT_END }; static const struct args detach_args[] = { { arg_string, &detach_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd detach_cmd = { .name = "detach", .fn = nsdetach, .descr = "Detach a controller from a namespace", .ctx_size = sizeof(detach_opt), .opts = detach_opts, .args = detach_args, }; CMD_SUBCOMMAND(ns_cmd, detach_cmd); static struct identify_options { bool hex; bool verbose; const char *dev; uint32_t nsid; } identify_opt = { .hex = false, .verbose = false, .dev = NULL, .nsid = NONE, }; static const struct opts identify_opts[] = { OPT("hex", 'x', arg_none, identify_opt, hex, "Print identiy information in hex"), OPT("verbose", 'v', arg_none, identify_opt, verbose, "More verbosity: print entire identify table"), OPT("nsid", 'n', arg_uint32, identify_opt, nsid, "The namespace ID to print IDENTIFY for"), { NULL, 0, arg_none, NULL, NULL } }; static const struct args identify_args[] = { { arg_string, &identify_opt.dev, "controller-id" }, { arg_none, NULL, NULL }, }; static struct cmd identify_cmd = { .name = "identify", .fn = nsidentify, .descr = "Print IDENTIFY for allocated namespace", .ctx_size = sizeof(identify_opt), .opts = identify_opts, .args = identify_args, }; CMD_SUBCOMMAND(ns_cmd, identify_cmd); /* handles NVME_OPC_NAMESPACE_MANAGEMENT and ATTACHMENT admin cmds */ struct ns_result_str { uint16_t res; const char * str; }; static struct ns_result_str ns_result[] = { { 0x2, "Invalid Field"}, { 0xa, "Invalid Format"}, { 0xb, "Invalid Namespace or format"}, { 0x15, "Namespace insufficent capacity"}, { 0x16, "Namespace ID unavaliable"}, { 0x18, "Namespace already attached"}, { 0x19, "Namespace is private"}, { 0x1a, "Namespace is not attached"}, { 0x1b, "Thin provisioning not supported"}, { 0x1c, "Controller list invalid"}, { 0x24, "ANA Group Identifier Invalid"}, { 0x25, "ANA Attach Failed"}, { 0xFFFF, "Unknown"} }; static const char * get_res_str(uint16_t res) { struct ns_result_str *t = ns_result; while (t->res != 0xFFFF) { if (t->res == res) return (t->str); t++; } return t->str; } static void nsactive(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; int fd, i; uint32_t list[1024]; if (arg_parse(argc, argv, f)) return; open_dev(active_opt.dev, &fd, 1, 1); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = htole32(0); pt.cmd.cdw10 = htole32(0x02); pt.buf = list; pt.len = sizeof(list); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); printf("Active namespaces:\n"); for (i = 0; list[i] != 0; i++) printf("%10d\n", le32toh(list[i])); exit(0); } static void nsallocated(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; int fd, i; uint32_t list[1024]; if (arg_parse(argc, argv, f)) return; open_dev(active_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = htole32(0); pt.cmd.cdw10 = htole32(0x10); pt.buf = list; pt.len = sizeof(list); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); printf("Allocated namespaces:\n"); for (i = 0; list[i] != 0; i++) printf("%10d\n", le32toh(list[i])); exit(0); } static void nscontrollers(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; int fd, i, n; uint16_t clist[2048]; if (arg_parse(argc, argv, f)) return; open_dev(controllers_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.cdw10 = htole32(0x13); pt.buf = clist; pt.len = sizeof(clist); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); n = le16toh(clist[0]); printf("NVM subsystem includes %d controller(s):\n", n); for (i = 0; i < n; i++) printf(" 0x%04x\n", le16toh(clist[i + 1])); exit(0); } /* * NS MGMT Command specific status values: * 0xa = Invalid Format * 0x15 = Namespace Insuffience capacity * 0x16 = Namespace ID unavailable (number namespaces exceeded) * 0xb = Thin Provisioning Not supported */ static void nscreate(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; struct nvme_namespace_data nsdata; int fd, result; if (arg_parse(argc, argv, f)) return; if (create_opt.cap == NONE64) create_opt.cap = create_opt.nsze; if (create_opt.nsze == NONE64) { fprintf(stderr, "Size not specified\n"); arg_help(argc, argv, f); } open_dev(create_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); /* Allow namespaces sharing if Multi-Path I/O is supported. */ if (create_opt.nmic == NONE) { create_opt.nmic = cd.mic ? (NVME_NS_DATA_NMIC_MAY_BE_SHARED_MASK << NVME_NS_DATA_NMIC_MAY_BE_SHARED_SHIFT) : 0; } memset(&nsdata, 0, sizeof(nsdata)); nsdata.nsze = create_opt.nsze; nsdata.ncap = create_opt.cap; if (create_opt.flbas == NONE) nsdata.flbas = ((create_opt.lbaf & NVME_NS_DATA_FLBAS_FORMAT_MASK) << NVME_NS_DATA_FLBAS_FORMAT_SHIFT) | ((create_opt.mset & NVME_NS_DATA_FLBAS_EXTENDED_MASK) << NVME_NS_DATA_FLBAS_EXTENDED_SHIFT); else nsdata.flbas = create_opt.flbas; if (create_opt.dps == NONE) nsdata.dps = ((create_opt.pi & NVME_NS_DATA_DPS_MD_START_MASK) << NVME_NS_DATA_DPS_MD_START_SHIFT) | ((create_opt.pil & NVME_NS_DATA_DPS_PIT_MASK) << NVME_NS_DATA_DPS_PIT_SHIFT); else nsdata.dps = create_opt.dps; nsdata.nmic = create_opt.nmic; nvme_namespace_data_swapbytes(&nsdata); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_NAMESPACE_MANAGEMENT; pt.cmd.cdw10 = htole32(0); /* create */ pt.buf = &nsdata; pt.len = sizeof(struct nvme_namespace_data); pt.is_read = 0; /* passthrough writes data to ctrlr */ if ((result = ioctl(fd, NVME_PASSTHROUGH_CMD, &pt)) < 0) errx(1, "ioctl request to %s failed: %d", argv[optind], result); if (nvme_completion_is_error(&pt.cpl)) { errx(1, "namespace creation failed: %s", get_res_str((pt.cpl.status >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)); } printf("namespace %d created\n", pt.cpl.cdw0); exit(0); } static void nsdelete(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; int fd, result; char buf[2]; if (arg_parse(argc, argv, f)) return; if (delete_opt.nsid == NONE) { fprintf(stderr, "No NSID specified"); arg_help(argc, argv, f); } open_dev(delete_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_NAMESPACE_MANAGEMENT; pt.cmd.cdw10 = htole32(1); /* delete */ pt.buf = buf; pt.len = sizeof(buf); pt.is_read = 1; pt.cmd.nsid = delete_opt.nsid; if ((result = ioctl(fd, NVME_PASSTHROUGH_CMD, &pt)) < 0) errx(1, "ioctl request to %s failed: %d", delete_opt.dev, result); if (nvme_completion_is_error(&pt.cpl)) { errx(1, "namespace deletion failed: %s", get_res_str((pt.cpl.status >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)); } printf("namespace %d deleted\n", delete_opt.nsid); exit(0); } /* * Attach and Detach use Dword 10, and a controller list (section 4.9) * This struct is 4096 bytes in size. * 0h = attach * 1h = detach * * Result values for both attach/detach: * * Completion 18h = Already attached * 19h = NS is private and already attached to a controller * 1Ah = Not attached, request could not be completed * 1Ch = Controller list invalid. * * 0x2 Invalid Field can occur if ctrlrid d.n.e in system. */ static void nsattach(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; int fd, result; uint16_t clist[2048]; if (arg_parse(argc, argv, f)) return; if (attach_opt.nsid == NONE) { fprintf(stderr, "No valid NSID specified\n"); arg_help(argc, argv, f); } open_dev(attach_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); if (attach_opt.ctrlrid == NONE) { /* Get full list of controllers to attach to. */ memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.cdw10 = htole32(0x13); pt.buf = clist; pt.len = sizeof(clist); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); } else { /* By default attach to this controller. */ if (attach_opt.ctrlrid == NONE - 1) attach_opt.ctrlrid = cd.ctrlr_id; memset(&clist, 0, sizeof(clist)); clist[0] = htole16(1); clist[1] = htole16(attach_opt.ctrlrid); } memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_NAMESPACE_ATTACHMENT; pt.cmd.cdw10 = htole32(0); /* attach */ pt.cmd.nsid = attach_opt.nsid; pt.buf = &clist; pt.len = sizeof(clist); if ((result = ioctl(fd, NVME_PASSTHROUGH_CMD, &pt)) < 0) errx(1, "ioctl request to %s failed: %d", attach_opt.dev, result); if (nvme_completion_is_error(&pt.cpl)) { errx(1, "namespace attach failed: %s", get_res_str((pt.cpl.status >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)); } printf("namespace %d attached\n", attach_opt.nsid); exit(0); } static void nsdetach(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; int fd, result; uint16_t clist[2048]; if (arg_parse(argc, argv, f)) return; if (detach_opt.nsid == NONE) { fprintf(stderr, "No valid NSID specified\n"); arg_help(argc, argv, f); } open_dev(detach_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); if (detach_opt.ctrlrid == NONE) { /* Get list of controllers this namespace attached to. */ memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = htole32(detach_opt.nsid); pt.cmd.cdw10 = htole32(0x12); pt.buf = clist; pt.len = sizeof(clist); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); if (clist[0] == 0) { detach_opt.ctrlrid = cd.ctrlr_id; memset(&clist, 0, sizeof(clist)); clist[0] = htole16(1); clist[1] = htole16(detach_opt.ctrlrid); } } else { /* By default detach from this controller. */ if (detach_opt.ctrlrid == NONE - 1) detach_opt.ctrlrid = cd.ctrlr_id; memset(&clist, 0, sizeof(clist)); clist[0] = htole16(1); clist[1] = htole16(detach_opt.ctrlrid); } memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_NAMESPACE_ATTACHMENT; pt.cmd.cdw10 = htole32(1); /* detach */ pt.cmd.nsid = detach_opt.nsid; pt.buf = &clist; pt.len = sizeof(clist); if ((result = ioctl(fd, NVME_PASSTHROUGH_CMD, &pt)) < 0) errx(1, "ioctl request to %s failed: %d", argv[optind], result); if (nvme_completion_is_error(&pt.cpl)) { errx(1, "namespace detach failed: %s", get_res_str((pt.cpl.status >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)); } printf("namespace %d detached\n", detach_opt.nsid); exit(0); } static void nsattached(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; int fd, i, n; uint16_t clist[2048]; if (arg_parse(argc, argv, f)) return; if (attached_opt.nsid == NONE) { fprintf(stderr, "No valid NSID specified\n"); arg_help(argc, argv, f); } open_dev(attached_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = htole32(attached_opt.nsid); pt.cmd.cdw10 = htole32(0x12); pt.buf = clist; pt.len = sizeof(clist); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); n = le16toh(clist[0]); printf("Attached %d controller(s):\n", n); for (i = 0; i < n; i++) printf(" 0x%04x\n", le16toh(clist[i + 1])); exit(0); } static void nsidentify(const struct cmd *f, int argc, char *argv[]) { struct nvme_pt_command pt; struct nvme_controller_data cd; struct nvme_namespace_data nsdata; uint8_t *data; int fd; u_int i; if (arg_parse(argc, argv, f)) return; if (identify_opt.nsid == NONE) { fprintf(stderr, "No valid NSID specified\n"); arg_help(argc, argv, f); } open_dev(identify_opt.dev, &fd, 1, 1); read_controller_data(fd, &cd); /* Check that controller can execute this command. */ if (((cd.oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & NVME_CTRLR_DATA_OACS_NSMGMT_MASK) == 0) errx(1, "controller does not support namespace management"); memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = htole32(identify_opt.nsid); pt.cmd.cdw10 = htole32(0x11); pt.buf = &nsdata; pt.len = sizeof(nsdata); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); close(fd); data = (uint8_t *)&nsdata; for (i = 0; i < sizeof(nsdata); i++) { if (data[i] != 0) break; } if (i == sizeof(nsdata)) errx(1, "namespace %d is not allocated", identify_opt.nsid); /* Convert data to host endian */ nvme_namespace_data_swapbytes(&nsdata); if (identify_opt.hex) { i = sizeof(struct nvme_namespace_data); if (!identify_opt.verbose) { for (; i > 384; i--) { if (data[i - 1] != 0) break; } } print_hex(&nsdata, i); exit(0); } print_namespace(&nsdata); exit(0); } static void ns(const struct cmd *nf __unused, int argc, char *argv[]) { cmd_dispatch(argc, argv, &ns_cmd); } Index: stable/12/sbin/nvmecontrol/nsid.c =================================================================== --- stable/12/sbin/nvmecontrol/nsid.c (nonexistent) +++ stable/12/sbin/nvmecontrol/nsid.c (revision 350946) @@ -0,0 +1,80 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 2019 Alexander Motin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include + +#include "nvmecontrol.h" +#include "comnd.h" + +/* Tables for command line parsing */ + +static cmd_fn_t gnsid; + +static struct nsid_options { + const char *dev; +} nsid_opt = { + .dev = NULL, +}; + +static const struct args nsid_args[] = { + { arg_string, &nsid_opt.dev, "namespace-id" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd nsid_cmd = { + .name = "nsid", + .fn = gnsid, + .descr = "Get controller and NSID for namespace", + .ctx_size = sizeof(nsid_opt), + .opts = NULL, + .args = nsid_args, +}; + +CMD_COMMAND(nsid_cmd); + +static void +gnsid(const struct cmd *f, int argc, char *argv[]) +{ + char *path; + int fd; + uint32_t nsid; + + arg_parse(argc, argv, f); + + open_dev(nsid_opt.dev, &fd, 1, 1); + get_nsid(fd, &path, &nsid); + close(fd); + printf("%s\t%u\n", path, nsid); + free(path); +} Property changes on: stable/12/sbin/nvmecontrol/nsid.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/12/sbin/nvmecontrol/nvmecontrol.8 =================================================================== --- stable/12/sbin/nvmecontrol/nvmecontrol.8 (revision 350945) +++ stable/12/sbin/nvmecontrol/nvmecontrol.8 (revision 350946) @@ -1,321 +1,325 @@ .\" -.\" Copyright (c) 2018 Alexander Motin +.\" Copyright (c) 2018-2019 Alexander Motin .\" Copyright (c) 2012 Intel Corporation .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" nvmecontrol man page. .\" .\" Author: Jim Harris .\" .\" $FreeBSD$ .\" -.Dd July 31, 2019 +.Dd August 1, 2019 .Dt NVMECONTROL 8 .Os .Sh NAME .Nm nvmecontrol .Nd NVM Express control utility .Sh SYNOPSIS .Nm .Ic devlist .Nm .Ic identify .Op Fl v .Op Fl x .Aq device id .Aq namespace id .Nm .Ic perftest .Aq Fl n Ar num_threads .Aq Fl o Ar read|write .Op Fl p .Aq Fl s Ar size_in_bytes .Aq Fl t Ar time_in_sec .Aq namespace id .Nm .Ic reset .Aq controller id .Nm .Ic logpage .Aq Fl p Ar page_id .Op Fl x .Op Fl v Ar vendor-string .Op Fl b .Aq device id .Aq namespace id .Nm .Ic ns active .Aq device id .Nm .Ic ns allocated .Aq device id .Nm .Ic ns attach .Aq Fl n Ar nsid .Aq Fl c Ar cntid .Aq device id .Nm .Ic ns attached .Aq Fl n Ar nsid .Aq device id .Nm .Ic ns controllers .Aq device id .Nm .Ic ns create .Aq Fl s Ar nsze .Op Fl c Ar ncap .Op Fl f Ar lbaf .Op Fl m Ar mset .Op Fl n Ar nmic .Op Fl p Ar pi .Op Fl l Ar pil .Op Fl L Ar flbas .Op Fl d Ar dps .Aq device id .Nm .Ic ns delete .Aq Fl n Ar nsid .Aq device id .Nm .Ic ns detach .Aq Fl n Ar nsid .Aq Fl c Ar cntid .Aq device id .Nm .Ic ns identify .Op Fl v .Op Fl x .Aq Fl n Ar nsid .Aq device id +.Nm +.Ic nsid +.Aq device id +.Aq namespace id .Nm .Ic firmware .Op Fl s Ar slot .Op Fl f Ar path_to_firmware .Op Fl a .Aq device id .Nm .Ic format .Op Fl f Ar fmt .Op Fl m Ar mset .Op Fl o Ar pi .Op Fl l Ar pil .Op Fl E .Op Fl C .Aq device id .Aq namespace id .Nm .Ic power .Op Fl l .Op Fl p power_state .Op Fl w workload_hint .Nm .Ic wdc cap-diag .Op Fl o path_template .Aq device id .Nm .Ic wdc drive-log .Op Fl o path_template .Aq device id .Nm .Ic wdc get-crash-dump .Op Fl o path_template .Aq device id .\" .Nm .\" .Ic wdc purge .\" .Aq device id .\" .Nm .\" .Ic wdc purge-monitor .\" .Aq device id .Sh DESCRIPTION NVM Express (NVMe) is a storage protocol standard, for SSDs and other high-speed storage devices over PCI Express. .Pp .Ss logpage The logpage command knows how to print log pages of various types. It also knows about vendor specific log pages from hgst/wdc and intel. Note that some vendors use the same log page numbers for different data. .Pp .Bl -tag -compact -width "Page 0x00" .It Dv Page 0x01 Drive Error Log .It Dv Page 0x02 Health/SMART Data .It Dv Page 0x03 Firmware Information .It Dv Page 0xc1 Advanced SMART information (WDC/HGST) .It Dv Page 0xc1 Read latency stats (Intel) .It Dv Page 0xc2 Wite latency stats (Intel) .It Dv Page 0xc5 Temperature stats (Intel) .It Dv Page 0xca Advanced SMART information (Intel) .El .Pp Specifying .Fl p .Ic help will list all valid vendors and pages. .Fl x will print the page as hex. .Fl b will print the binary data for the page. .Ss ns Various namespace management commands. If namespace management is supported by device, allow list, create and delete namespaces, list, attach and detach controllers to namespaces. .Ss format Format either specified namespace, or all namespaces of specified controller, using specified parameters: .Ar fmt LBA Format, .Ar mset Metadata Settings, .Ar pi Protection Information, .Ar pil Protection Information Location. When formatting specific namespace, existing values are used as defaults. When formatting all namespaces, all parameters should be specified. Some controllers may not support formatting or erasing specific or all namespaces. Option .Fl E enables User Data Erase during format. Option .Fl C enables Cryptographic Erase during format. .Ss wdc The various wdc command retrieve log data from the wdc/hgst drives. The .Fl o flag specifies a path template to use to output the files. Each file takes the path template (which defaults to nothing), appends the drive's serial number and the type of dump it is followed by .bin. These logs must be sent to the vendor for analysis. This tool only provides a way to extract them. .Sh EXAMPLES .Dl nvmecontrol devlist .Pp Display a list of NVMe controllers and namespaces along with their device nodes. .Pp .Dl nvmecontrol identify nvme0 .Pp Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data. .Pp .Dl nvmecontrol identify -x -v nvme0ns1 .Pp Display an hexadecimal dump of the nvme0 IDENTIFY_NAMESPACE data for namespace 1. .Pp .Dl nvmecontrol perftest -n 32 -o read -s 512 -t 30 nvme0ns1 .Pp Run a performance test on nvme0ns1 using 32 kernel threads for 30 seconds. Each thread will issue a single 512 byte read command. Results are printed to stdout when 30 seconds expires. .Pp .Dl nvmecontrol reset nvme0 .Pp Perform a controller-level reset of the nvme0 controller. .Pp .Dl nvmecontrol logpage -p 1 nvme0 .Pp Display a human-readable summary of the nvme0 controller's Error Information Log. Log pages defined by the NVMe specification include Error Information Log (ID=1), SMART/Health Information Log (ID=2), and Firmware Slot Log (ID=3). .Pp .Dl nvmecontrol logpage -p 0xc1 -v wdc nvme0 .Pp Display a human-readable summary of the nvme0's wdc-specific advanced SMART data. .Pp .Dl nvmecontrol logpage -p 1 -x nvme0 .Pp Display a hexadecimal dump of the nvme0 controller's Error Information Log. .Pp .Dl nvmecontrol logpage -p 0xcb -b nvme0 > /tmp/page-cb.bin .Pp Print the contents of vendor specific page 0xcb as binary data on standard out. Redirect it to a temporary file. .Pp .Dl nvmecontrol firmware -s 2 -f /tmp/nvme_firmware nvme0 .Pp Download the firmware image contained in "/tmp/nvme_firmware" to slot 2 of the nvme0 controller, but do not activate the image. .Pp .Dl nvmecontrol firmware -s 4 -a nvme0 .Pp Activate the firmware in slot 4 of the nvme0 controller on the next reset. .Pp .Dl nvmecontrol firmware -s 7 -f /tmp/nvme_firmware -a nvme0 .Pp Download the firmware image contained in "/tmp/nvme_firmware" to slot 7 of the nvme0 controller and activate it on the next reset. .Pp .Dl nvmecontrol power -l nvme0 .Pp List all the current power modes. .Pp .Dl nvmecontrol power -p 3 nvme0 .Pp Set the current power mode. .Pp .Dl nvmecontrol power nvme0 .Pp Get the current power mode. .Sh DYNAMIC LOADING The directories .Pa /lib/nvmecontrol and .Pa /usr/local/lib/nvmecontrol are scanned for any .so files. These files are loaded. The members of the .Va top linker set are added to the top-level commands. The members of the .Va logpage linker set are added to the logpage parsers. .Sh HISTORY The .Nm utility appeared in .Fx 9.2 . .Sh AUTHORS .An -nosplit .Nm was developed by Intel and originally written by .An Jim Harris Aq Mt jimharris@FreeBSD.org . .Pp This man page was written by .An Jim Harris Aq Mt jimharris@FreeBSD.org . Index: stable/12/sbin/nvmecontrol/nvmecontrol.c =================================================================== --- stable/12/sbin/nvmecontrol/nvmecontrol.c (revision 350945) +++ stable/12/sbin/nvmecontrol/nvmecontrol.c (revision 350946) @@ -1,208 +1,188 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nvmecontrol.h" static void print_bytes(void *data, uint32_t length) { uint32_t i, j; uint8_t *p, *end; end = (uint8_t *)data + length; for (i = 0; i < length; i++) { p = (uint8_t *)data + (i*16); printf("%03x: ", i*16); for (j = 0; j < 16 && p < end; j++) printf("%02x ", *p++); if (p >= end) break; printf("\n"); } printf("\n"); } static void print_dwords(void *data, uint32_t length) { uint32_t *p; uint32_t i, j; p = (uint32_t *)data; length /= sizeof(uint32_t); for (i = 0; i < length; i+=8) { printf("%03x: ", i*4); for (j = 0; j < 8; j++) printf("%08x ", p[i+j]); printf("\n"); } printf("\n"); } void print_hex(void *data, uint32_t length) { if (length >= sizeof(uint32_t) || length % sizeof(uint32_t) == 0) print_dwords(data, length); else print_bytes(data, length); } void read_controller_data(int fd, struct nvme_controller_data *cdata) { struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.cdw10 = htole32(1); pt.buf = cdata; pt.len = sizeof(*cdata); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); /* Convert data to host endian */ nvme_controller_data_swapbytes(cdata); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); } void read_namespace_data(int fd, uint32_t nsid, struct nvme_namespace_data *nsdata) { struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); pt.cmd.opc = NVME_OPC_IDENTIFY; pt.cmd.nsid = htole32(nsid); pt.cmd.cdw10 = htole32(0); pt.buf = nsdata; pt.len = sizeof(*nsdata); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); /* Convert data to host endian */ nvme_namespace_data_swapbytes(nsdata); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); } int open_dev(const char *str, int *fd, int show_error, int exit_on_error) { char full_path[64]; - if (!strnstr(str, NVME_CTRLR_PREFIX, strlen(NVME_CTRLR_PREFIX))) { - if (show_error) - warnx("controller/namespace ids must begin with '%s'", - NVME_CTRLR_PREFIX); - if (exit_on_error) - exit(1); - else - return (EINVAL); - } - snprintf(full_path, sizeof(full_path), _PATH_DEV"%s", str); *fd = open(full_path, O_RDWR); if (*fd < 0) { if (show_error) warn("could not open %s", full_path); if (exit_on_error) exit(1); else return (errno); } return (0); } void -parse_ns_str(const char *ns_str, char *ctrlr_str, uint32_t *nsid) +get_nsid(int fd, char **ctrlr_str, uint32_t *nsid) { - char *nsloc; + struct nvme_get_nsid gnsid; - /* - * Pull the namespace id from the string. +2 skips past the "ns" part - * of the string. Don't search past 10 characters into the string, - * otherwise we know it is malformed. - */ - nsloc = strnstr(ns_str, NVME_NS_PREFIX, 10); - if (nsloc != NULL) - *nsid = strtol(nsloc + 2, NULL, 10); - if (nsloc == NULL || (*nsid == 0 && errno != 0)) - errx(1, "invalid namespace ID '%s'", ns_str); - - /* - * The controller string will include only the nvmX part of the - * nvmeXnsY string. - */ - snprintf(ctrlr_str, nsloc - ns_str + 1, "%s", ns_str); + if (ioctl(fd, NVME_GET_NSID, &gnsid) < 0) + err(1, "NVME_GET_NSID ioctl failed"); + if (ctrlr_str != NULL) + *ctrlr_str = strndup(gnsid.cdev, sizeof(gnsid.cdev)); + if (nsid != NULL) + *nsid = gnsid.nsid; } int main(int argc, char *argv[]) { cmd_init(); cmd_load_dir("/lib/nvmecontrol", NULL, NULL); cmd_load_dir("/usr/local/lib/nvmecontrol", NULL, NULL); cmd_dispatch(argc, argv, NULL); return (0); } Index: stable/12/sbin/nvmecontrol/nvmecontrol.h =================================================================== --- stable/12/sbin/nvmecontrol/nvmecontrol.h (revision 350945) +++ stable/12/sbin/nvmecontrol/nvmecontrol.h (revision 350946) @@ -1,104 +1,104 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __NVMECONTROL_H__ #define __NVMECONTROL_H__ #include #include "comnd.h" typedef void (*print_fn_t)(const struct nvme_controller_data *cdata, void *buf, uint32_t size); struct logpage_function { SLIST_ENTRY(logpage_function) link; uint8_t log_page; const char *vendor; const char *name; print_fn_t print_fn; size_t size; }; #define NVME_LOGPAGE(unique, lp, vend, nam, fn, sz) \ static struct logpage_function unique ## _lpf = { \ .log_page = lp, \ .vendor = vend, \ .name = nam, \ .print_fn = fn, \ .size = sz, \ } ; \ static void logpage_reg_##unique(void) __attribute__((constructor)); \ static void logpage_reg_##unique(void) { logpage_register(&unique##_lpf); } #define DEFAULT_SIZE (4096) struct kv_name { uint32_t key; const char *name; }; const char *kv_lookup(const struct kv_name *kv, size_t kv_count, uint32_t key); void logpage_register(struct logpage_function *p); #define NVME_CTRLR_PREFIX "nvme" #define NVME_NS_PREFIX "ns" int open_dev(const char *str, int *fd, int show_error, int exit_on_error); -void parse_ns_str(const char *ns_str, char *ctrlr_str, uint32_t *nsid); +void get_nsid(int fd, char **ctrlr_str, uint32_t *nsid); void read_controller_data(int fd, struct nvme_controller_data *cdata); void read_namespace_data(int fd, uint32_t nsid, struct nvme_namespace_data *nsdata); void print_hex(void *data, uint32_t length); void print_namespace(struct nvme_namespace_data *nsdata); void read_logpage(int fd, uint8_t log_page, uint32_t nsid, void *payload, uint32_t payload_size); void print_temp(uint16_t t); void print_intel_add_smart(const struct nvme_controller_data *cdata __unused, void *buf, uint32_t size __unused); /* Utility Routines */ /* * 128-bit integer augments to standard values. On i386 this * doesn't exist, so we use 64-bit values. So, on 32-bit i386, * you'll get truncated values until someone implement 128bit * ints in sofware. */ #define UINT128_DIG 39 #ifdef __i386__ typedef uint64_t uint128_t; #else typedef __uint128_t uint128_t; #endif static __inline uint128_t to128(void *p) { return *(uint128_t *)p; } uint64_t le48dec(const void *pp); char * uint128_to_str(uint128_t u, char *buf, size_t buflen); #endif Index: stable/12/sys/dev/nvme/nvme.h =================================================================== --- stable/12/sys/dev/nvme/nvme.h (revision 350945) +++ stable/12/sys/dev/nvme/nvme.h (revision 350946) @@ -1,1674 +1,1683 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __NVME_H__ #define __NVME_H__ #ifdef _KERNEL #include #endif #include #include #define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command) #define NVME_RESET_CONTROLLER _IO('n', 1) +#define NVME_GET_NSID _IOR('n', 2, struct nvme_get_nsid) #define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test) #define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test) /* * Macros to deal with NVME revisions, as defined VS register */ #define NVME_REV(x, y) (((x) << 16) | ((y) << 8)) #define NVME_MAJOR(r) (((r) >> 16) & 0xffff) #define NVME_MINOR(r) (((r) >> 8) & 0xff) /* * Use to mark a command to apply to all namespaces, or to retrieve global * log pages. */ #define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF) /* Cap nvme to 1MB transfers driver explodes with larger sizes */ #define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20)) /* Register field definitions */ #define NVME_CAP_LO_REG_MQES_SHIFT (0) #define NVME_CAP_LO_REG_MQES_MASK (0xFFFF) #define NVME_CAP_LO_REG_CQR_SHIFT (16) #define NVME_CAP_LO_REG_CQR_MASK (0x1) #define NVME_CAP_LO_REG_AMS_SHIFT (17) #define NVME_CAP_LO_REG_AMS_MASK (0x3) #define NVME_CAP_LO_REG_TO_SHIFT (24) #define NVME_CAP_LO_REG_TO_MASK (0xFF) #define NVME_CAP_LO_MQES(x) \ (((x) >> NVME_CAP_LO_REG_MQES_SHIFT) & NVME_CAP_LO_REG_MQES_MASK) #define NVME_CAP_LO_CQR(x) \ (((x) >> NVME_CAP_LO_REG_CQR_SHIFT) & NVME_CAP_LO_REG_CQR_MASK) #define NVME_CAP_LO_AMS(x) \ (((x) >> NVME_CAP_LO_REG_AMS_SHIFT) & NVME_CAP_LO_REG_AMS_MASK) #define NVME_CAP_LO_TO(x) \ (((x) >> NVME_CAP_LO_REG_TO_SHIFT) & NVME_CAP_LO_REG_TO_MASK) #define NVME_CAP_HI_REG_DSTRD_SHIFT (0) #define NVME_CAP_HI_REG_DSTRD_MASK (0xF) #define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5) #define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1) #define NVME_CAP_HI_REG_MPSMIN_SHIFT (16) #define NVME_CAP_HI_REG_MPSMIN_MASK (0xF) #define NVME_CAP_HI_REG_MPSMAX_SHIFT (20) #define NVME_CAP_HI_REG_MPSMAX_MASK (0xF) #define NVME_CAP_HI_DSTRD(x) \ (((x) >> NVME_CAP_HI_REG_DSTRD_SHIFT) & NVME_CAP_HI_REG_DSTRD_MASK) #define NVME_CAP_HI_CSS_NVM(x) \ (((x) >> NVME_CAP_HI_REG_CSS_NVM_SHIFT) & NVME_CAP_HI_REG_CSS_NVM_MASK) #define NVME_CAP_HI_MPSMIN(x) \ (((x) >> NVME_CAP_HI_REG_MPSMIN_SHIFT) & NVME_CAP_HI_REG_MPSMIN_MASK) #define NVME_CAP_HI_MPSMAX(x) \ (((x) >> NVME_CAP_HI_REG_MPSMAX_SHIFT) & NVME_CAP_HI_REG_MPSMAX_MASK) #define NVME_CC_REG_EN_SHIFT (0) #define NVME_CC_REG_EN_MASK (0x1) #define NVME_CC_REG_CSS_SHIFT (4) #define NVME_CC_REG_CSS_MASK (0x7) #define NVME_CC_REG_MPS_SHIFT (7) #define NVME_CC_REG_MPS_MASK (0xF) #define NVME_CC_REG_AMS_SHIFT (11) #define NVME_CC_REG_AMS_MASK (0x7) #define NVME_CC_REG_SHN_SHIFT (14) #define NVME_CC_REG_SHN_MASK (0x3) #define NVME_CC_REG_IOSQES_SHIFT (16) #define NVME_CC_REG_IOSQES_MASK (0xF) #define NVME_CC_REG_IOCQES_SHIFT (20) #define NVME_CC_REG_IOCQES_MASK (0xF) #define NVME_CSTS_REG_RDY_SHIFT (0) #define NVME_CSTS_REG_RDY_MASK (0x1) #define NVME_CSTS_REG_CFS_SHIFT (1) #define NVME_CSTS_REG_CFS_MASK (0x1) #define NVME_CSTS_REG_SHST_SHIFT (2) #define NVME_CSTS_REG_SHST_MASK (0x3) #define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK) #define NVME_AQA_REG_ASQS_SHIFT (0) #define NVME_AQA_REG_ASQS_MASK (0xFFF) #define NVME_AQA_REG_ACQS_SHIFT (16) #define NVME_AQA_REG_ACQS_MASK (0xFFF) /* Command field definitions */ #define NVME_CMD_FUSE_SHIFT (8) #define NVME_CMD_FUSE_MASK (0x3) #define NVME_STATUS_P_SHIFT (0) #define NVME_STATUS_P_MASK (0x1) #define NVME_STATUS_SC_SHIFT (1) #define NVME_STATUS_SC_MASK (0xFF) #define NVME_STATUS_SCT_SHIFT (9) #define NVME_STATUS_SCT_MASK (0x7) #define NVME_STATUS_M_SHIFT (14) #define NVME_STATUS_M_MASK (0x1) #define NVME_STATUS_DNR_SHIFT (15) #define NVME_STATUS_DNR_MASK (0x1) #define NVME_STATUS_GET_P(st) (((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK) #define NVME_STATUS_GET_SC(st) (((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK) #define NVME_STATUS_GET_SCT(st) (((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK) #define NVME_STATUS_GET_M(st) (((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK) #define NVME_STATUS_GET_DNR(st) (((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK) #define NVME_PWR_ST_MPS_SHIFT (0) #define NVME_PWR_ST_MPS_MASK (0x1) #define NVME_PWR_ST_NOPS_SHIFT (1) #define NVME_PWR_ST_NOPS_MASK (0x1) #define NVME_PWR_ST_RRT_SHIFT (0) #define NVME_PWR_ST_RRT_MASK (0x1F) #define NVME_PWR_ST_RRL_SHIFT (0) #define NVME_PWR_ST_RRL_MASK (0x1F) #define NVME_PWR_ST_RWT_SHIFT (0) #define NVME_PWR_ST_RWT_MASK (0x1F) #define NVME_PWR_ST_RWL_SHIFT (0) #define NVME_PWR_ST_RWL_MASK (0x1F) #define NVME_PWR_ST_IPS_SHIFT (6) #define NVME_PWR_ST_IPS_MASK (0x3) #define NVME_PWR_ST_APW_SHIFT (0) #define NVME_PWR_ST_APW_MASK (0x7) #define NVME_PWR_ST_APS_SHIFT (6) #define NVME_PWR_ST_APS_MASK (0x3) /** Controller Multi-path I/O and Namespace Sharing Capabilities */ /* More then one port */ #define NVME_CTRLR_DATA_MIC_MPORTS_SHIFT (0) #define NVME_CTRLR_DATA_MIC_MPORTS_MASK (0x1) /* More then one controller */ #define NVME_CTRLR_DATA_MIC_MCTRLRS_SHIFT (1) #define NVME_CTRLR_DATA_MIC_MCTRLRS_MASK (0x1) /* SR-IOV Virtual Function */ #define NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT (2) #define NVME_CTRLR_DATA_MIC_SRIOVVF_MASK (0x1) /* Asymmetric Namespace Access Reporting */ #define NVME_CTRLR_DATA_MIC_ANAR_SHIFT (3) #define NVME_CTRLR_DATA_MIC_ANAR_MASK (0x1) /** OACS - optional admin command support */ /* supports security send/receive commands */ #define NVME_CTRLR_DATA_OACS_SECURITY_SHIFT (0) #define NVME_CTRLR_DATA_OACS_SECURITY_MASK (0x1) /* supports format nvm command */ #define NVME_CTRLR_DATA_OACS_FORMAT_SHIFT (1) #define NVME_CTRLR_DATA_OACS_FORMAT_MASK (0x1) /* supports firmware activate/download commands */ #define NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT (2) #define NVME_CTRLR_DATA_OACS_FIRMWARE_MASK (0x1) /* supports namespace management commands */ #define NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT (3) #define NVME_CTRLR_DATA_OACS_NSMGMT_MASK (0x1) /* supports Device Self-test command */ #define NVME_CTRLR_DATA_OACS_SELFTEST_SHIFT (4) #define NVME_CTRLR_DATA_OACS_SELFTEST_MASK (0x1) /* supports Directives */ #define NVME_CTRLR_DATA_OACS_DIRECTIVES_SHIFT (5) #define NVME_CTRLR_DATA_OACS_DIRECTIVES_MASK (0x1) /* supports NVMe-MI Send/Receive */ #define NVME_CTRLR_DATA_OACS_NVMEMI_SHIFT (6) #define NVME_CTRLR_DATA_OACS_NVMEMI_MASK (0x1) /* supports Virtualization Management */ #define NVME_CTRLR_DATA_OACS_VM_SHIFT (7) #define NVME_CTRLR_DATA_OACS_VM_MASK (0x1) /* supports Doorbell Buffer Config */ #define NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT (8) #define NVME_CTRLR_DATA_OACS_DBBUFFER_MASK (0x1) /* supports Get LBA Status */ #define NVME_CTRLR_DATA_OACS_GETLBA_SHIFT (9) #define NVME_CTRLR_DATA_OACS_GETLBA_MASK (0x1) /** firmware updates */ /* first slot is read-only */ #define NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT (0) #define NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK (0x1) /* number of firmware slots */ #define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1) #define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7) /* firmware activation without reset */ #define NVME_CTRLR_DATA_FRMW_ACT_WO_RESET_SHIFT (4) #define NVME_CTRLR_DATA_FRMW_ACT_WO_RESET_MASK (0x1) /** log page attributes */ /* per namespace smart/health log page */ #define NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT (0) #define NVME_CTRLR_DATA_LPA_NS_SMART_MASK (0x1) /** AVSCC - admin vendor specific command configuration */ /* admin vendor specific commands use spec format */ #define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_SHIFT (0) #define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_MASK (0x1) /** Autonomous Power State Transition Attributes */ /* Autonomous Power State Transitions supported */ #define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0) #define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1) /** Sanitize Capabilities */ /* Crypto Erase Support */ #define NVME_CTRLR_DATA_SANICAP_CES_SHIFT (0) #define NVME_CTRLR_DATA_SANICAP_CES_MASK (0x1) /* Block Erase Support */ #define NVME_CTRLR_DATA_SANICAP_BES_SHIFT (1) #define NVME_CTRLR_DATA_SANICAP_BES_MASK (0x1) /* Overwrite Support */ #define NVME_CTRLR_DATA_SANICAP_OWS_SHIFT (2) #define NVME_CTRLR_DATA_SANICAP_OWS_MASK (0x1) /* No-Deallocate Inhibited */ #define NVME_CTRLR_DATA_SANICAP_NDI_SHIFT (29) #define NVME_CTRLR_DATA_SANICAP_NDI_MASK (0x1) /* No-Deallocate Modifies Media After Sanitize */ #define NVME_CTRLR_DATA_SANICAP_NODMMAS_SHIFT (30) #define NVME_CTRLR_DATA_SANICAP_NODMMAS_MASK (0x3) #define NVME_CTRLR_DATA_SANICAP_NODMMAS_UNDEF (0) #define NVME_CTRLR_DATA_SANICAP_NODMMAS_NO (1) #define NVME_CTRLR_DATA_SANICAP_NODMMAS_YES (2) /** submission queue entry size */ #define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0) #define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF) #define NVME_CTRLR_DATA_SQES_MAX_SHIFT (4) #define NVME_CTRLR_DATA_SQES_MAX_MASK (0xF) /** completion queue entry size */ #define NVME_CTRLR_DATA_CQES_MIN_SHIFT (0) #define NVME_CTRLR_DATA_CQES_MIN_MASK (0xF) #define NVME_CTRLR_DATA_CQES_MAX_SHIFT (4) #define NVME_CTRLR_DATA_CQES_MAX_MASK (0xF) /** optional nvm command support */ #define NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT (0) #define NVME_CTRLR_DATA_ONCS_COMPARE_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT (1) #define NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_DSM_SHIFT (2) #define NVME_CTRLR_DATA_ONCS_DSM_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_WRZERO_SHIFT (3) #define NVME_CTRLR_DATA_ONCS_WRZERO_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_SAVEFEAT_SHIFT (4) #define NVME_CTRLR_DATA_ONCS_SAVEFEAT_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_RESERV_SHIFT (5) #define NVME_CTRLR_DATA_ONCS_RESERV_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT (6) #define NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_VERIFY_SHIFT (7) #define NVME_CTRLR_DATA_ONCS_VERIFY_MASK (0x1) /** Fused Operation Support */ #define NVME_CTRLR_DATA_FUSES_CNW_SHIFT (0) #define NVME_CTRLR_DATA_FUSES_CNW_MASK (0x1) /** Format NVM Attributes */ #define NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT (0) #define NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK (0x1) #define NVME_CTRLR_DATA_FNA_ERASE_ALL_SHIFT (1) #define NVME_CTRLR_DATA_FNA_ERASE_ALL_MASK (0x1) #define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_SHIFT (2) #define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK (0x1) /** volatile write cache */ /* volatile write cache present */ #define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0) #define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1) /* flush all namespaces supported */ #define NVME_CTRLR_DATA_VWC_ALL_SHIFT (1) #define NVME_CTRLR_DATA_VWC_ALL_MASK (0x3) #define NVME_CTRLR_DATA_VWC_ALL_UNKNOWN (0) #define NVME_CTRLR_DATA_VWC_ALL_NO (2) #define NVME_CTRLR_DATA_VWC_ALL_YES (3) /** namespace features */ /* thin provisioning */ #define NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT (0) #define NVME_NS_DATA_NSFEAT_THIN_PROV_MASK (0x1) /* NAWUN, NAWUPF, and NACWU fields are valid */ #define NVME_NS_DATA_NSFEAT_NA_FIELDS_SHIFT (1) #define NVME_NS_DATA_NSFEAT_NA_FIELDS_MASK (0x1) /* Deallocated or Unwritten Logical Block errors supported */ #define NVME_NS_DATA_NSFEAT_DEALLOC_SHIFT (2) #define NVME_NS_DATA_NSFEAT_DEALLOC_MASK (0x1) /* NGUID and EUI64 fields are not reusable */ #define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_SHIFT (3) #define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_MASK (0x1) /* NPWG, NPWA, NPDG, NPDA, and NOWS are valid */ #define NVME_NS_DATA_NSFEAT_NPVALID_SHIFT (4) #define NVME_NS_DATA_NSFEAT_NPVALID_MASK (0x1) /** formatted lba size */ #define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0) #define NVME_NS_DATA_FLBAS_FORMAT_MASK (0xF) #define NVME_NS_DATA_FLBAS_EXTENDED_SHIFT (4) #define NVME_NS_DATA_FLBAS_EXTENDED_MASK (0x1) /** metadata capabilities */ /* metadata can be transferred as part of data prp list */ #define NVME_NS_DATA_MC_EXTENDED_SHIFT (0) #define NVME_NS_DATA_MC_EXTENDED_MASK (0x1) /* metadata can be transferred with separate metadata pointer */ #define NVME_NS_DATA_MC_POINTER_SHIFT (1) #define NVME_NS_DATA_MC_POINTER_MASK (0x1) /** end-to-end data protection capabilities */ /* protection information type 1 */ #define NVME_NS_DATA_DPC_PIT1_SHIFT (0) #define NVME_NS_DATA_DPC_PIT1_MASK (0x1) /* protection information type 2 */ #define NVME_NS_DATA_DPC_PIT2_SHIFT (1) #define NVME_NS_DATA_DPC_PIT2_MASK (0x1) /* protection information type 3 */ #define NVME_NS_DATA_DPC_PIT3_SHIFT (2) #define NVME_NS_DATA_DPC_PIT3_MASK (0x1) /* first eight bytes of metadata */ #define NVME_NS_DATA_DPC_MD_START_SHIFT (3) #define NVME_NS_DATA_DPC_MD_START_MASK (0x1) /* last eight bytes of metadata */ #define NVME_NS_DATA_DPC_MD_END_SHIFT (4) #define NVME_NS_DATA_DPC_MD_END_MASK (0x1) /** end-to-end data protection type settings */ /* protection information type */ #define NVME_NS_DATA_DPS_PIT_SHIFT (0) #define NVME_NS_DATA_DPS_PIT_MASK (0x7) /* 1 == protection info transferred at start of metadata */ /* 0 == protection info transferred at end of metadata */ #define NVME_NS_DATA_DPS_MD_START_SHIFT (3) #define NVME_NS_DATA_DPS_MD_START_MASK (0x1) /** Namespace Multi-path I/O and Namespace Sharing Capabilities */ /* the namespace may be attached to two or more controllers */ #define NVME_NS_DATA_NMIC_MAY_BE_SHARED_SHIFT (0) #define NVME_NS_DATA_NMIC_MAY_BE_SHARED_MASK (0x1) /** Reservation Capabilities */ /* Persist Through Power Loss */ #define NVME_NS_DATA_RESCAP_PTPL_SHIFT (0) #define NVME_NS_DATA_RESCAP_PTPL_MASK (0x1) /* supports the Write Exclusive */ #define NVME_NS_DATA_RESCAP_WR_EX_SHIFT (1) #define NVME_NS_DATA_RESCAP_WR_EX_MASK (0x1) /* supports the Exclusive Access */ #define NVME_NS_DATA_RESCAP_EX_AC_SHIFT (2) #define NVME_NS_DATA_RESCAP_EX_AC_MASK (0x1) /* supports the Write Exclusive – Registrants Only */ #define NVME_NS_DATA_RESCAP_WR_EX_RO_SHIFT (3) #define NVME_NS_DATA_RESCAP_WR_EX_RO_MASK (0x1) /* supports the Exclusive Access - Registrants Only */ #define NVME_NS_DATA_RESCAP_EX_AC_RO_SHIFT (4) #define NVME_NS_DATA_RESCAP_EX_AC_RO_MASK (0x1) /* supports the Write Exclusive – All Registrants */ #define NVME_NS_DATA_RESCAP_WR_EX_AR_SHIFT (5) #define NVME_NS_DATA_RESCAP_WR_EX_AR_MASK (0x1) /* supports the Exclusive Access - All Registrants */ #define NVME_NS_DATA_RESCAP_EX_AC_AR_SHIFT (6) #define NVME_NS_DATA_RESCAP_EX_AC_AR_MASK (0x1) /* Ignore Existing Key is used as defined in revision 1.3 or later */ #define NVME_NS_DATA_RESCAP_IEKEY13_SHIFT (7) #define NVME_NS_DATA_RESCAP_IEKEY13_MASK (0x1) /** Format Progress Indicator */ /* percentage of the Format NVM command that remains to be completed */ #define NVME_NS_DATA_FPI_PERC_SHIFT (0) #define NVME_NS_DATA_FPI_PERC_MASK (0x7f) /* namespace supports the Format Progress Indicator */ #define NVME_NS_DATA_FPI_SUPP_SHIFT (7) #define NVME_NS_DATA_FPI_SUPP_MASK (0x1) /** Deallocate Logical Block Features */ /* deallocated logical block read behavior */ #define NVME_NS_DATA_DLFEAT_READ_SHIFT (0) #define NVME_NS_DATA_DLFEAT_READ_MASK (0x07) #define NVME_NS_DATA_DLFEAT_READ_NR (0x00) #define NVME_NS_DATA_DLFEAT_READ_00 (0x01) #define NVME_NS_DATA_DLFEAT_READ_FF (0x02) /* supports the Deallocate bit in the Write Zeroes */ #define NVME_NS_DATA_DLFEAT_DWZ_SHIFT (3) #define NVME_NS_DATA_DLFEAT_DWZ_MASK (0x01) /* Guard field for deallocated logical blocks is set to the CRC */ #define NVME_NS_DATA_DLFEAT_GCRC_SHIFT (4) #define NVME_NS_DATA_DLFEAT_GCRC_MASK (0x01) /** lba format support */ /* metadata size */ #define NVME_NS_DATA_LBAF_MS_SHIFT (0) #define NVME_NS_DATA_LBAF_MS_MASK (0xFFFF) /* lba data size */ #define NVME_NS_DATA_LBAF_LBADS_SHIFT (16) #define NVME_NS_DATA_LBAF_LBADS_MASK (0xFF) /* relative performance */ #define NVME_NS_DATA_LBAF_RP_SHIFT (24) #define NVME_NS_DATA_LBAF_RP_MASK (0x3) enum nvme_critical_warning_state { NVME_CRIT_WARN_ST_AVAILABLE_SPARE = 0x1, NVME_CRIT_WARN_ST_TEMPERATURE = 0x2, NVME_CRIT_WARN_ST_DEVICE_RELIABILITY = 0x4, NVME_CRIT_WARN_ST_READ_ONLY = 0x8, NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP = 0x10, }; #define NVME_CRIT_WARN_ST_RESERVED_MASK (0xE0) /* slot for current FW */ #define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0) #define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7) /* CC register SHN field values */ enum shn_value { NVME_SHN_NORMAL = 0x1, NVME_SHN_ABRUPT = 0x2, }; /* CSTS register SHST field values */ enum shst_value { NVME_SHST_NORMAL = 0x0, NVME_SHST_OCCURRING = 0x1, NVME_SHST_COMPLETE = 0x2, }; struct nvme_registers { /** controller capabilities */ uint32_t cap_lo; uint32_t cap_hi; uint32_t vs; /* version */ uint32_t intms; /* interrupt mask set */ uint32_t intmc; /* interrupt mask clear */ /** controller configuration */ uint32_t cc; uint32_t reserved1; /** controller status */ uint32_t csts; uint32_t reserved2; /** admin queue attributes */ uint32_t aqa; uint64_t asq; /* admin submission queue base addr */ uint64_t acq; /* admin completion queue base addr */ uint32_t reserved3[0x3f2]; struct { uint32_t sq_tdbl; /* submission queue tail doorbell */ uint32_t cq_hdbl; /* completion queue head doorbell */ } doorbell[1] __packed; } __packed; _Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_registers"); struct nvme_command { /* dword 0 */ uint8_t opc; /* opcode */ uint8_t fuse; /* fused operation */ uint16_t cid; /* command identifier */ /* dword 1 */ uint32_t nsid; /* namespace identifier */ /* dword 2-3 */ uint32_t rsvd2; uint32_t rsvd3; /* dword 4-5 */ uint64_t mptr; /* metadata pointer */ /* dword 6-7 */ uint64_t prp1; /* prp entry 1 */ /* dword 8-9 */ uint64_t prp2; /* prp entry 2 */ /* dword 10-15 */ uint32_t cdw10; /* command-specific */ uint32_t cdw11; /* command-specific */ uint32_t cdw12; /* command-specific */ uint32_t cdw13; /* command-specific */ uint32_t cdw14; /* command-specific */ uint32_t cdw15; /* command-specific */ } __packed; _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command"); struct nvme_completion { /* dword 0 */ uint32_t cdw0; /* command-specific */ /* dword 1 */ uint32_t rsvd1; /* dword 2 */ uint16_t sqhd; /* submission queue head pointer */ uint16_t sqid; /* submission queue identifier */ /* dword 3 */ uint16_t cid; /* command identifier */ uint16_t status; } __packed; _Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion"); struct nvme_dsm_range { uint32_t attributes; uint32_t length; uint64_t starting_lba; } __packed; /* Largest DSM Trim that can be done */ #define NVME_MAX_DSM_TRIM 4096 _Static_assert(sizeof(struct nvme_dsm_range) == 16, "bad size for nvme_dsm_ranage"); /* status code types */ enum nvme_status_code_type { NVME_SCT_GENERIC = 0x0, NVME_SCT_COMMAND_SPECIFIC = 0x1, NVME_SCT_MEDIA_ERROR = 0x2, /* 0x3-0x6 - reserved */ NVME_SCT_VENDOR_SPECIFIC = 0x7, }; /* generic command status codes */ enum nvme_generic_command_status_code { NVME_SC_SUCCESS = 0x00, NVME_SC_INVALID_OPCODE = 0x01, NVME_SC_INVALID_FIELD = 0x02, NVME_SC_COMMAND_ID_CONFLICT = 0x03, NVME_SC_DATA_TRANSFER_ERROR = 0x04, NVME_SC_ABORTED_POWER_LOSS = 0x05, NVME_SC_INTERNAL_DEVICE_ERROR = 0x06, NVME_SC_ABORTED_BY_REQUEST = 0x07, NVME_SC_ABORTED_SQ_DELETION = 0x08, NVME_SC_ABORTED_FAILED_FUSED = 0x09, NVME_SC_ABORTED_MISSING_FUSED = 0x0a, NVME_SC_INVALID_NAMESPACE_OR_FORMAT = 0x0b, NVME_SC_COMMAND_SEQUENCE_ERROR = 0x0c, NVME_SC_INVALID_SGL_SEGMENT_DESCR = 0x0d, NVME_SC_INVALID_NUMBER_OF_SGL_DESCR = 0x0e, NVME_SC_DATA_SGL_LENGTH_INVALID = 0x0f, NVME_SC_METADATA_SGL_LENGTH_INVALID = 0x10, NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID = 0x11, NVME_SC_INVALID_USE_OF_CMB = 0x12, NVME_SC_PRP_OFFET_INVALID = 0x13, NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED = 0x14, NVME_SC_OPERATION_DENIED = 0x15, NVME_SC_SGL_OFFSET_INVALID = 0x16, /* 0x17 - reserved */ NVME_SC_HOST_ID_INCONSISTENT_FORMAT = 0x18, NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED = 0x19, NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID = 0x1a, NVME_SC_ABORTED_DUE_TO_PREEMPT = 0x1b, NVME_SC_SANITIZE_FAILED = 0x1c, NVME_SC_SANITIZE_IN_PROGRESS = 0x1d, NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID = 0x1e, NVME_SC_NOT_SUPPORTED_IN_CMB = 0x1f, NVME_SC_LBA_OUT_OF_RANGE = 0x80, NVME_SC_CAPACITY_EXCEEDED = 0x81, NVME_SC_NAMESPACE_NOT_READY = 0x82, NVME_SC_RESERVATION_CONFLICT = 0x83, NVME_SC_FORMAT_IN_PROGRESS = 0x84, }; /* command specific status codes */ enum nvme_command_specific_status_code { NVME_SC_COMPLETION_QUEUE_INVALID = 0x00, NVME_SC_INVALID_QUEUE_IDENTIFIER = 0x01, NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED = 0x02, NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED = 0x03, /* 0x04 - reserved */ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05, NVME_SC_INVALID_FIRMWARE_SLOT = 0x06, NVME_SC_INVALID_FIRMWARE_IMAGE = 0x07, NVME_SC_INVALID_INTERRUPT_VECTOR = 0x08, NVME_SC_INVALID_LOG_PAGE = 0x09, NVME_SC_INVALID_FORMAT = 0x0a, NVME_SC_FIRMWARE_REQUIRES_RESET = 0x0b, NVME_SC_INVALID_QUEUE_DELETION = 0x0c, NVME_SC_FEATURE_NOT_SAVEABLE = 0x0d, NVME_SC_FEATURE_NOT_CHANGEABLE = 0x0e, NVME_SC_FEATURE_NOT_NS_SPECIFIC = 0x0f, NVME_SC_FW_ACT_REQUIRES_NVMS_RESET = 0x10, NVME_SC_FW_ACT_REQUIRES_RESET = 0x11, NVME_SC_FW_ACT_REQUIRES_TIME = 0x12, NVME_SC_FW_ACT_PROHIBITED = 0x13, NVME_SC_OVERLAPPING_RANGE = 0x14, NVME_SC_NS_INSUFFICIENT_CAPACITY = 0x15, NVME_SC_NS_ID_UNAVAILABLE = 0x16, /* 0x17 - reserved */ NVME_SC_NS_ALREADY_ATTACHED = 0x18, NVME_SC_NS_IS_PRIVATE = 0x19, NVME_SC_NS_NOT_ATTACHED = 0x1a, NVME_SC_THIN_PROV_NOT_SUPPORTED = 0x1b, NVME_SC_CTRLR_LIST_INVALID = 0x1c, NVME_SC_SELT_TEST_IN_PROGRESS = 0x1d, NVME_SC_BOOT_PART_WRITE_PROHIB = 0x1e, NVME_SC_INVALID_CTRLR_ID = 0x1f, NVME_SC_INVALID_SEC_CTRLR_STATE = 0x20, NVME_SC_INVALID_NUM_OF_CTRLR_RESRC = 0x21, NVME_SC_INVALID_RESOURCE_ID = 0x22, NVME_SC_CONFLICTING_ATTRIBUTES = 0x80, NVME_SC_INVALID_PROTECTION_INFO = 0x81, NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE = 0x82, }; /* media error status codes */ enum nvme_media_error_status_code { NVME_SC_WRITE_FAULTS = 0x80, NVME_SC_UNRECOVERED_READ_ERROR = 0x81, NVME_SC_GUARD_CHECK_ERROR = 0x82, NVME_SC_APPLICATION_TAG_CHECK_ERROR = 0x83, NVME_SC_REFERENCE_TAG_CHECK_ERROR = 0x84, NVME_SC_COMPARE_FAILURE = 0x85, NVME_SC_ACCESS_DENIED = 0x86, NVME_SC_DEALLOCATED_OR_UNWRITTEN = 0x87, }; /* admin opcodes */ enum nvme_admin_opcode { NVME_OPC_DELETE_IO_SQ = 0x00, NVME_OPC_CREATE_IO_SQ = 0x01, NVME_OPC_GET_LOG_PAGE = 0x02, /* 0x03 - reserved */ NVME_OPC_DELETE_IO_CQ = 0x04, NVME_OPC_CREATE_IO_CQ = 0x05, NVME_OPC_IDENTIFY = 0x06, /* 0x07 - reserved */ NVME_OPC_ABORT = 0x08, NVME_OPC_SET_FEATURES = 0x09, NVME_OPC_GET_FEATURES = 0x0a, /* 0x0b - reserved */ NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c, NVME_OPC_NAMESPACE_MANAGEMENT = 0x0d, /* 0x0e-0x0f - reserved */ NVME_OPC_FIRMWARE_ACTIVATE = 0x10, NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11, NVME_OPC_DEVICE_SELF_TEST = 0x14, NVME_OPC_NAMESPACE_ATTACHMENT = 0x15, NVME_OPC_KEEP_ALIVE = 0x18, NVME_OPC_DIRECTIVE_SEND = 0x19, NVME_OPC_DIRECTIVE_RECEIVE = 0x1a, NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c, NVME_OPC_NVME_MI_SEND = 0x1d, NVME_OPC_NVME_MI_RECEIVE = 0x1e, NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c, NVME_OPC_FORMAT_NVM = 0x80, NVME_OPC_SECURITY_SEND = 0x81, NVME_OPC_SECURITY_RECEIVE = 0x82, NVME_OPC_SANITIZE = 0x84, }; /* nvme nvm opcodes */ enum nvme_nvm_opcode { NVME_OPC_FLUSH = 0x00, NVME_OPC_WRITE = 0x01, NVME_OPC_READ = 0x02, /* 0x03 - reserved */ NVME_OPC_WRITE_UNCORRECTABLE = 0x04, NVME_OPC_COMPARE = 0x05, /* 0x06 - reserved */ NVME_OPC_WRITE_ZEROES = 0x08, /* 0x07 - reserved */ NVME_OPC_DATASET_MANAGEMENT = 0x09, /* 0x0a-0x0c - reserved */ NVME_OPC_RESERVATION_REGISTER = 0x0d, NVME_OPC_RESERVATION_REPORT = 0x0e, /* 0x0f-0x10 - reserved */ NVME_OPC_RESERVATION_ACQUIRE = 0x11, /* 0x12-0x14 - reserved */ NVME_OPC_RESERVATION_RELEASE = 0x15, }; enum nvme_feature { /* 0x00 - reserved */ NVME_FEAT_ARBITRATION = 0x01, NVME_FEAT_POWER_MANAGEMENT = 0x02, NVME_FEAT_LBA_RANGE_TYPE = 0x03, NVME_FEAT_TEMPERATURE_THRESHOLD = 0x04, NVME_FEAT_ERROR_RECOVERY = 0x05, NVME_FEAT_VOLATILE_WRITE_CACHE = 0x06, NVME_FEAT_NUMBER_OF_QUEUES = 0x07, NVME_FEAT_INTERRUPT_COALESCING = 0x08, NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09, NVME_FEAT_WRITE_ATOMICITY = 0x0A, NVME_FEAT_ASYNC_EVENT_CONFIGURATION = 0x0B, NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C, NVME_FEAT_HOST_MEMORY_BUFFER = 0x0D, NVME_FEAT_TIMESTAMP = 0x0E, NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F, NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT = 0x10, NVME_FEAT_NON_OP_POWER_STATE_CONFIG = 0x11, /* 0x12-0x77 - reserved */ /* 0x78-0x7f - NVMe Management Interface */ NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80, /* 0x81-0xBF - command set specific (reserved) */ /* 0xC0-0xFF - vendor specific */ }; enum nvme_dsm_attribute { NVME_DSM_ATTR_INTEGRAL_READ = 0x1, NVME_DSM_ATTR_INTEGRAL_WRITE = 0x2, NVME_DSM_ATTR_DEALLOCATE = 0x4, }; enum nvme_activate_action { NVME_AA_REPLACE_NO_ACTIVATE = 0x0, NVME_AA_REPLACE_ACTIVATE = 0x1, NVME_AA_ACTIVATE = 0x2, }; struct nvme_power_state { /** Maximum Power */ uint16_t mp; /* Maximum Power */ uint8_t ps_rsvd1; uint8_t mps_nops; /* Max Power Scale, Non-Operational State */ uint32_t enlat; /* Entry Latency */ uint32_t exlat; /* Exit Latency */ uint8_t rrt; /* Relative Read Throughput */ uint8_t rrl; /* Relative Read Latency */ uint8_t rwt; /* Relative Write Throughput */ uint8_t rwl; /* Relative Write Latency */ uint16_t idlp; /* Idle Power */ uint8_t ips; /* Idle Power Scale */ uint8_t ps_rsvd8; uint16_t actp; /* Active Power */ uint8_t apw_aps; /* Active Power Workload, Active Power Scale */ uint8_t ps_rsvd10[9]; } __packed; _Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_state"); #define NVME_SERIAL_NUMBER_LENGTH 20 #define NVME_MODEL_NUMBER_LENGTH 40 #define NVME_FIRMWARE_REVISION_LENGTH 8 struct nvme_controller_data { /* bytes 0-255: controller capabilities and features */ /** pci vendor id */ uint16_t vid; /** pci subsystem vendor id */ uint16_t ssvid; /** serial number */ uint8_t sn[NVME_SERIAL_NUMBER_LENGTH]; /** model number */ uint8_t mn[NVME_MODEL_NUMBER_LENGTH]; /** firmware revision */ uint8_t fr[NVME_FIRMWARE_REVISION_LENGTH]; /** recommended arbitration burst */ uint8_t rab; /** ieee oui identifier */ uint8_t ieee[3]; /** multi-interface capabilities */ uint8_t mic; /** maximum data transfer size */ uint8_t mdts; /** Controller ID */ uint16_t ctrlr_id; /** Version */ uint32_t ver; /** RTD3 Resume Latency */ uint32_t rtd3r; /** RTD3 Enter Latency */ uint32_t rtd3e; /** Optional Asynchronous Events Supported */ uint32_t oaes; /* bitfield really */ /** Controller Attributes */ uint32_t ctratt; /* bitfield really */ /** Read Recovery Levels Supported */ uint16_t rrls; uint8_t reserved1[9]; /** Controller Type */ uint8_t cntrltype; /** FRU Globally Unique Identifier */ uint8_t fguid[16]; /** Command Retry Delay Time 1 */ uint16_t crdt1; /** Command Retry Delay Time 2 */ uint16_t crdt2; /** Command Retry Delay Time 3 */ uint16_t crdt3; uint8_t reserved2[122]; /* bytes 256-511: admin command set attributes */ /** optional admin command support */ uint16_t oacs; /** abort command limit */ uint8_t acl; /** asynchronous event request limit */ uint8_t aerl; /** firmware updates */ uint8_t frmw; /** log page attributes */ uint8_t lpa; /** error log page entries */ uint8_t elpe; /** number of power states supported */ uint8_t npss; /** admin vendor specific command configuration */ uint8_t avscc; /** Autonomous Power State Transition Attributes */ uint8_t apsta; /** Warning Composite Temperature Threshold */ uint16_t wctemp; /** Critical Composite Temperature Threshold */ uint16_t cctemp; /** Maximum Time for Firmware Activation */ uint16_t mtfa; /** Host Memory Buffer Preferred Size */ uint32_t hmpre; /** Host Memory Buffer Minimum Size */ uint32_t hmmin; /** Name space capabilities */ struct { /* if nsmgmt, report tnvmcap and unvmcap */ uint8_t tnvmcap[16]; uint8_t unvmcap[16]; } __packed untncap; /** Replay Protected Memory Block Support */ uint32_t rpmbs; /* Really a bitfield */ /** Extended Device Self-test Time */ uint16_t edstt; /** Device Self-test Options */ uint8_t dsto; /* Really a bitfield */ /** Firmware Update Granularity */ uint8_t fwug; /** Keep Alive Support */ uint16_t kas; /** Host Controlled Thermal Management Attributes */ uint16_t hctma; /* Really a bitfield */ /** Minimum Thermal Management Temperature */ uint16_t mntmt; /** Maximum Thermal Management Temperature */ uint16_t mxtmt; /** Sanitize Capabilities */ uint32_t sanicap; /* Really a bitfield */ /** Host Memory Buffer Minimum Descriptor Entry Size */ uint32_t hmminds; /** Host Memory Maximum Descriptors Entries */ uint16_t hmmaxd; /** NVM Set Identifier Maximum */ uint16_t nsetidmax; /** Endurance Group Identifier Maximum */ uint16_t endgidmax; /** ANA Transition Time */ uint8_t anatt; /** Asymmetric Namespace Access Capabilities */ uint8_t anacap; /** ANA Group Identifier Maximum */ uint32_t anagrpmax; /** Number of ANA Group Identifiers */ uint32_t nanagrpid; /** Persistent Event Log Size */ uint32_t pels; uint8_t reserved3[156]; /* bytes 512-703: nvm command set attributes */ /** submission queue entry size */ uint8_t sqes; /** completion queue entry size */ uint8_t cqes; /** Maximum Outstanding Commands */ uint16_t maxcmd; /** number of namespaces */ uint32_t nn; /** optional nvm command support */ uint16_t oncs; /** fused operation support */ uint16_t fuses; /** format nvm attributes */ uint8_t fna; /** volatile write cache */ uint8_t vwc; /** Atomic Write Unit Normal */ uint16_t awun; /** Atomic Write Unit Power Fail */ uint16_t awupf; /** NVM Vendor Specific Command Configuration */ uint8_t nvscc; /** Namespace Write Protection Capabilities */ uint8_t nwpc; /** Atomic Compare & Write Unit */ uint16_t acwu; uint16_t reserved6; /** SGL Support */ uint32_t sgls; /** Maximum Number of Allowed Namespaces */ uint32_t mnan; /* bytes 540-767: Reserved */ uint8_t reserved7[224]; /** NVM Subsystem NVMe Qualified Name */ uint8_t subnqn[256]; /* bytes 1024-1791: Reserved */ uint8_t reserved8[768]; /* bytes 1792-2047: NVMe over Fabrics specification */ uint8_t reserved9[256]; /* bytes 2048-3071: power state descriptors */ struct nvme_power_state power_state[32]; /* bytes 3072-4095: vendor specific */ uint8_t vs[1024]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data"); struct nvme_namespace_data { /** namespace size */ uint64_t nsze; /** namespace capacity */ uint64_t ncap; /** namespace utilization */ uint64_t nuse; /** namespace features */ uint8_t nsfeat; /** number of lba formats */ uint8_t nlbaf; /** formatted lba size */ uint8_t flbas; /** metadata capabilities */ uint8_t mc; /** end-to-end data protection capabilities */ uint8_t dpc; /** end-to-end data protection type settings */ uint8_t dps; /** Namespace Multi-path I/O and Namespace Sharing Capabilities */ uint8_t nmic; /** Reservation Capabilities */ uint8_t rescap; /** Format Progress Indicator */ uint8_t fpi; /** Deallocate Logical Block Features */ uint8_t dlfeat; /** Namespace Atomic Write Unit Normal */ uint16_t nawun; /** Namespace Atomic Write Unit Power Fail */ uint16_t nawupf; /** Namespace Atomic Compare & Write Unit */ uint16_t nacwu; /** Namespace Atomic Boundary Size Normal */ uint16_t nabsn; /** Namespace Atomic Boundary Offset */ uint16_t nabo; /** Namespace Atomic Boundary Size Power Fail */ uint16_t nabspf; /** Namespace Optimal IO Boundary */ uint16_t noiob; /** NVM Capacity */ uint8_t nvmcap[16]; /** Namespace Preferred Write Granularity */ uint16_t npwg; /** Namespace Preferred Write Alignment */ uint16_t npwa; /** Namespace Preferred Deallocate Granularity */ uint16_t npdg; /** Namespace Preferred Deallocate Alignment */ uint16_t npda; /** Namespace Optimal Write Size */ uint16_t nows; /* bytes 74-91: Reserved */ uint8_t reserved5[18]; /** ANA Group Identifier */ uint32_t anagrpid; /* bytes 96-98: Reserved */ uint8_t reserved6[3]; /** Namespace Attributes */ uint8_t nsattr; /** NVM Set Identifier */ uint16_t nvmsetid; /** Endurance Group Identifier */ uint16_t endgid; /** Namespace Globally Unique Identifier */ uint8_t nguid[16]; /** IEEE Extended Unique Identifier */ uint8_t eui64[8]; /** lba format support */ uint32_t lbaf[16]; uint8_t reserved7[192]; uint8_t vendor_specific[3712]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data"); enum nvme_log_page { /* 0x00 - reserved */ NVME_LOG_ERROR = 0x01, NVME_LOG_HEALTH_INFORMATION = 0x02, NVME_LOG_FIRMWARE_SLOT = 0x03, NVME_LOG_CHANGED_NAMESPACE = 0x04, NVME_LOG_COMMAND_EFFECT = 0x05, /* 0x06-0x7F - reserved */ /* 0x80-0xBF - I/O command set specific */ NVME_LOG_RES_NOTIFICATION = 0x80, /* 0xC0-0xFF - vendor specific */ /* * The following are Intel Specific log pages, but they seem * to be widely implemented. */ INTEL_LOG_READ_LAT_LOG = 0xc1, INTEL_LOG_WRITE_LAT_LOG = 0xc2, INTEL_LOG_TEMP_STATS = 0xc5, INTEL_LOG_ADD_SMART = 0xca, INTEL_LOG_DRIVE_MKT_NAME = 0xdd, /* * HGST log page, with lots ofs sub pages. */ HGST_INFO_LOG = 0xc1, }; struct nvme_error_information_entry { uint64_t error_count; uint16_t sqid; uint16_t cid; uint16_t status; uint16_t error_location; uint64_t lba; uint32_t nsid; uint8_t vendor_specific; uint8_t reserved[35]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry"); struct nvme_health_information_page { uint8_t critical_warning; uint16_t temperature; uint8_t available_spare; uint8_t available_spare_threshold; uint8_t percentage_used; uint8_t reserved[26]; /* * Note that the following are 128-bit values, but are * defined as an array of 2 64-bit values. */ /* Data Units Read is always in 512-byte units. */ uint64_t data_units_read[2]; /* Data Units Written is always in 512-byte units. */ uint64_t data_units_written[2]; /* For NVM command set, this includes Compare commands. */ uint64_t host_read_commands[2]; uint64_t host_write_commands[2]; /* Controller Busy Time is reported in minutes. */ uint64_t controller_busy_time[2]; uint64_t power_cycles[2]; uint64_t power_on_hours[2]; uint64_t unsafe_shutdowns[2]; uint64_t media_errors[2]; uint64_t num_error_info_log_entries[2]; uint32_t warning_temp_time; uint32_t error_temp_time; uint16_t temp_sensor[8]; uint8_t reserved2[296]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page"); struct nvme_firmware_page { uint8_t afi; uint8_t reserved[7]; uint64_t revision[7]; /* revisions for 7 slots */ uint8_t reserved2[448]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_firmware_page) == 512, "bad size for nvme_firmware_page"); struct nvme_ns_list { uint32_t ns[1024]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_ns_list) == 4096, "bad size for nvme_ns_list"); struct intel_log_temp_stats { uint64_t current; uint64_t overtemp_flag_last; uint64_t overtemp_flag_life; uint64_t max_temp; uint64_t min_temp; uint64_t _rsvd[5]; uint64_t max_oper_temp; uint64_t min_oper_temp; uint64_t est_offset; } __packed __aligned(4); _Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats"); #define NVME_TEST_MAX_THREADS 128 struct nvme_io_test { enum nvme_nvm_opcode opc; uint32_t size; uint32_t time; /* in seconds */ uint32_t num_threads; uint32_t flags; uint64_t io_completed[NVME_TEST_MAX_THREADS]; }; enum nvme_io_test_flags { /* * Specifies whether dev_refthread/dev_relthread should be * called during NVME_BIO_TEST. Ignored for other test * types. */ NVME_TEST_FLAG_REFTHREAD = 0x1, }; struct nvme_pt_command { /* * cmd is used to specify a passthrough command to a controller or * namespace. * * The following fields from cmd may be specified by the caller: * * opc (opcode) * * nsid (namespace id) - for admin commands only * * cdw10-cdw15 * * Remaining fields must be set to 0 by the caller. */ struct nvme_command cmd; /* * cpl returns completion status for the passthrough command * specified by cmd. * * The following fields will be filled out by the driver, for * consumption by the caller: * * cdw0 * * status (except for phase) * * Remaining fields will be set to 0 by the driver. */ struct nvme_completion cpl; /* buf is the data buffer associated with this passthrough command. */ void * buf; /* * len is the length of the data buffer associated with this * passthrough command. */ uint32_t len; /* * is_read = 1 if the passthrough command will read data into the * supplied buffer from the controller. * * is_read = 0 if the passthrough command will write data from the * supplied buffer to the controller. */ uint32_t is_read; /* * driver_lock is used by the driver only. It must be set to 0 * by the caller. */ struct mtx * driver_lock; }; +struct nvme_get_nsid { + char cdev[SPECNAMELEN + 1]; + uint32_t nsid; +}; + #define nvme_completion_is_error(cpl) \ (NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0) void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen); #ifdef _KERNEL struct bio; +struct thread; struct nvme_namespace; struct nvme_controller; struct nvme_consumer; typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *); typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *); typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *); typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *, uint32_t, void *, uint32_t); typedef void (*nvme_cons_fail_fn_t)(void *); enum nvme_namespace_flags { NVME_NS_DEALLOCATE_SUPPORTED = 0x1, NVME_NS_FLUSH_SUPPORTED = 0x2, }; int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer, int is_admin_cmd); /* Admin functions */ void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature, uint32_t cdw11, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature, uint32_t cdw11, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, uint8_t log_page, uint32_t nsid, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg); /* NVM I/O functions */ int nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg); int nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn, void *cb_arg); int nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg); int nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn, void *cb_arg); int nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload, uint8_t num_ranges, nvme_cb_fn_t cb_fn, void *cb_arg); int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn, void *cb_arg); int nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset, size_t len); /* Registration functions */ struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn, nvme_cons_async_fn_t async_fn, nvme_cons_fail_fn_t fail_fn); void nvme_unregister_consumer(struct nvme_consumer *consumer); /* Controller helper functions */ device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr); const struct nvme_controller_data * nvme_ctrlr_get_data(struct nvme_controller *ctrlr); static inline bool nvme_ctrlr_has_dataset_mgmt(const struct nvme_controller_data *cd) { /* Assumes cd was byte swapped by nvme_controller_data_swapbytes() */ return ((cd->oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) & NVME_CTRLR_DATA_ONCS_DSM_MASK); } /* Namespace helper functions */ uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns); uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns); uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns); uint64_t nvme_ns_get_size(struct nvme_namespace *ns); uint32_t nvme_ns_get_flags(struct nvme_namespace *ns); const char * nvme_ns_get_serial_number(struct nvme_namespace *ns); const char * nvme_ns_get_model_number(struct nvme_namespace *ns); const struct nvme_namespace_data * nvme_ns_get_data(struct nvme_namespace *ns); uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns); int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn); +int nvme_ns_ioctl_process(struct nvme_namespace *ns, u_long cmd, + caddr_t arg, int flag, struct thread *td); /* * Command building helper functions -- shared with CAM * These functions assume allocator zeros out cmd structure * CAM's xpt_get_ccb and the request allocator for nvme both * do zero'd allocations. */ static inline void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid) { cmd->opc = NVME_OPC_FLUSH; cmd->nsid = htole32(nsid); } static inline void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid, uint64_t lba, uint32_t count) { cmd->opc = rwcmd; cmd->nsid = htole32(nsid); cmd->cdw10 = htole32(lba & 0xffffffffu); cmd->cdw11 = htole32(lba >> 32); cmd->cdw12 = htole32(count-1); } static inline void nvme_ns_write_cmd(struct nvme_command *cmd, uint32_t nsid, uint64_t lba, uint32_t count) { nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count); } static inline void nvme_ns_read_cmd(struct nvme_command *cmd, uint32_t nsid, uint64_t lba, uint32_t count) { nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count); } static inline void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid, uint32_t num_ranges) { cmd->opc = NVME_OPC_DATASET_MANAGEMENT; cmd->nsid = htole32(nsid); cmd->cdw10 = htole32(num_ranges - 1); cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE); } extern int nvme_use_nvd; #endif /* _KERNEL */ /* Endianess conversion functions for NVMe structs */ static inline void nvme_completion_swapbytes(struct nvme_completion *s) { s->cdw0 = le32toh(s->cdw0); /* omit rsvd1 */ s->sqhd = le16toh(s->sqhd); s->sqid = le16toh(s->sqid); /* omit cid */ s->status = le16toh(s->status); } static inline void nvme_power_state_swapbytes(struct nvme_power_state *s) { s->mp = le16toh(s->mp); s->enlat = le32toh(s->enlat); s->exlat = le32toh(s->exlat); s->idlp = le16toh(s->idlp); s->actp = le16toh(s->actp); } static inline void nvme_controller_data_swapbytes(struct nvme_controller_data *s) { int i; s->vid = le16toh(s->vid); s->ssvid = le16toh(s->ssvid); s->ctrlr_id = le16toh(s->ctrlr_id); s->ver = le32toh(s->ver); s->rtd3r = le32toh(s->rtd3r); s->rtd3e = le32toh(s->rtd3e); s->oaes = le32toh(s->oaes); s->ctratt = le32toh(s->ctratt); s->rrls = le16toh(s->rrls); s->crdt1 = le16toh(s->crdt1); s->crdt2 = le16toh(s->crdt2); s->crdt3 = le16toh(s->crdt3); s->oacs = le16toh(s->oacs); s->wctemp = le16toh(s->wctemp); s->cctemp = le16toh(s->cctemp); s->mtfa = le16toh(s->mtfa); s->hmpre = le32toh(s->hmpre); s->hmmin = le32toh(s->hmmin); s->rpmbs = le32toh(s->rpmbs); s->edstt = le16toh(s->edstt); s->kas = le16toh(s->kas); s->hctma = le16toh(s->hctma); s->mntmt = le16toh(s->mntmt); s->mxtmt = le16toh(s->mxtmt); s->sanicap = le32toh(s->sanicap); s->hmminds = le32toh(s->hmminds); s->hmmaxd = le16toh(s->hmmaxd); s->nsetidmax = le16toh(s->nsetidmax); s->endgidmax = le16toh(s->endgidmax); s->anagrpmax = le32toh(s->anagrpmax); s->nanagrpid = le32toh(s->nanagrpid); s->pels = le32toh(s->pels); s->maxcmd = le16toh(s->maxcmd); s->nn = le32toh(s->nn); s->oncs = le16toh(s->oncs); s->fuses = le16toh(s->fuses); s->awun = le16toh(s->awun); s->awupf = le16toh(s->awupf); s->acwu = le16toh(s->acwu); s->sgls = le32toh(s->sgls); s->mnan = le32toh(s->mnan); for (i = 0; i < 32; i++) nvme_power_state_swapbytes(&s->power_state[i]); } static inline void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s) { int i; s->nsze = le64toh(s->nsze); s->ncap = le64toh(s->ncap); s->nuse = le64toh(s->nuse); s->nawun = le16toh(s->nawun); s->nawupf = le16toh(s->nawupf); s->nacwu = le16toh(s->nacwu); s->nabsn = le16toh(s->nabsn); s->nabo = le16toh(s->nabo); s->nabspf = le16toh(s->nabspf); s->noiob = le16toh(s->noiob); s->npwg = le16toh(s->npwg); s->npwa = le16toh(s->npwa); s->npdg = le16toh(s->npdg); s->npda = le16toh(s->npda); s->nows = le16toh(s->nows); s->anagrpid = le32toh(s->anagrpid); s->nvmsetid = le16toh(s->nvmsetid); s->endgid = le16toh(s->endgid); for (i = 0; i < 16; i++) s->lbaf[i] = le32toh(s->lbaf[i]); } static inline void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s) { s->error_count = le64toh(s->error_count); s->sqid = le16toh(s->sqid); s->cid = le16toh(s->cid); s->status = le16toh(s->status); s->error_location = le16toh(s->error_location); s->lba = le64toh(s->lba); s->nsid = le32toh(s->nsid); } static inline void nvme_le128toh(void *p) { #if _BYTE_ORDER != _LITTLE_ENDIAN /* Swap 16 bytes in place */ char *tmp = (char*)p; char b; int i; for (i = 0; i < 8; i++) { b = tmp[i]; tmp[i] = tmp[15-i]; tmp[15-i] = b; } #else (void)p; #endif } static inline void nvme_health_information_page_swapbytes(struct nvme_health_information_page *s) { int i; s->temperature = le16toh(s->temperature); nvme_le128toh((void *)s->data_units_read); nvme_le128toh((void *)s->data_units_written); nvme_le128toh((void *)s->host_read_commands); nvme_le128toh((void *)s->host_write_commands); nvme_le128toh((void *)s->controller_busy_time); nvme_le128toh((void *)s->power_cycles); nvme_le128toh((void *)s->power_on_hours); nvme_le128toh((void *)s->unsafe_shutdowns); nvme_le128toh((void *)s->media_errors); nvme_le128toh((void *)s->num_error_info_log_entries); s->warning_temp_time = le32toh(s->warning_temp_time); s->error_temp_time = le32toh(s->error_temp_time); for (i = 0; i < 8; i++) s->temp_sensor[i] = le16toh(s->temp_sensor[i]); } static inline void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s) { int i; for (i = 0; i < 7; i++) s->revision[i] = le64toh(s->revision[i]); } static inline void nvme_ns_list_swapbytes(struct nvme_ns_list *s) { int i; for (i = 0; i < 1024; i++) s->ns[i] = le32toh(s->ns[i]); } static inline void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s) { s->current = le64toh(s->current); s->overtemp_flag_last = le64toh(s->overtemp_flag_last); s->overtemp_flag_life = le64toh(s->overtemp_flag_life); s->max_temp = le64toh(s->max_temp); s->min_temp = le64toh(s->min_temp); /* omit _rsvd[] */ s->max_oper_temp = le64toh(s->max_oper_temp); s->min_oper_temp = le64toh(s->min_oper_temp); s->est_offset = le64toh(s->est_offset); } #endif /* __NVME_H__ */ Index: stable/12/sys/dev/nvme/nvme_ctrlr.c =================================================================== --- stable/12/sys/dev/nvme/nvme_ctrlr.c (revision 350945) +++ stable/12/sys/dev/nvme/nvme_ctrlr.c (revision 350946) @@ -1,1418 +1,1426 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2016 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cam.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "nvme_private.h" #define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */ static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, struct nvme_async_event_request *aer); static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr); static int nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) { ctrlr->resource_id = PCIR_BAR(0); ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY, &ctrlr->resource_id, RF_ACTIVE); if(ctrlr->resource == NULL) { nvme_printf(ctrlr, "unable to allocate pci resource\n"); return (ENOMEM); } ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; /* * The NVMe spec allows for the MSI-X table to be placed behind * BAR 4/5, separate from the control/doorbell registers. Always * try to map this bar, because it must be mapped prior to calling * pci_alloc_msix(). If the table isn't behind BAR 4/5, * bus_alloc_resource() will just return NULL which is OK. */ ctrlr->bar4_resource_id = PCIR_BAR(4); ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY, &ctrlr->bar4_resource_id, RF_ACTIVE); return (0); } static int nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr) { struct nvme_qpair *qpair; uint32_t num_entries; int error; qpair = &ctrlr->adminq; num_entries = NVME_ADMIN_ENTRIES; TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries); /* * If admin_entries was overridden to an invalid value, revert it * back to our default value. */ if (num_entries < NVME_MIN_ADMIN_ENTRIES || num_entries > NVME_MAX_ADMIN_ENTRIES) { nvme_printf(ctrlr, "invalid hw.nvme.admin_entries=%d " "specified\n", num_entries); num_entries = NVME_ADMIN_ENTRIES; } /* * The admin queue's max xfer size is treated differently than the * max I/O xfer size. 16KB is sufficient here - maybe even less? */ error = nvme_qpair_construct(qpair, 0, /* qpair ID */ 0, /* vector */ num_entries, NVME_ADMIN_TRACKERS, ctrlr); return (error); } static int nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) { struct nvme_qpair *qpair; uint32_t cap_lo; uint16_t mqes; int i, error, num_entries, num_trackers; num_entries = NVME_IO_ENTRIES; TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries); /* * NVMe spec sets a hard limit of 64K max entries, but * devices may specify a smaller limit, so we need to check * the MQES field in the capabilities register. */ cap_lo = nvme_mmio_read_4(ctrlr, cap_lo); mqes = NVME_CAP_LO_MQES(cap_lo); num_entries = min(num_entries, mqes + 1); num_trackers = NVME_IO_TRACKERS; TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers); num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS); num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS); /* * No need to have more trackers than entries in the submit queue. * Note also that for a queue size of N, we can only have (N-1) * commands outstanding, hence the "-1" here. */ num_trackers = min(num_trackers, (num_entries-1)); /* * Our best estimate for the maximum number of I/Os that we should * noramlly have in flight at one time. This should be viewed as a hint, * not a hard limit and will need to be revisitted when the upper layers * of the storage system grows multi-queue support. */ ctrlr->max_hw_pend_io = num_trackers * ctrlr->num_io_queues * 3 / 4; /* * This was calculated previously when setting up interrupts, but * a controller could theoretically support fewer I/O queues than * MSI-X vectors. So calculate again here just to be safe. */ ctrlr->num_cpus_per_ioq = howmany(mp_ncpus, ctrlr->num_io_queues); ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair), M_NVME, M_ZERO | M_WAITOK); for (i = 0; i < ctrlr->num_io_queues; i++) { qpair = &ctrlr->ioq[i]; /* * Admin queue has ID=0. IO queues start at ID=1 - * hence the 'i+1' here. * * For I/O queues, use the controller-wide max_xfer_size * calculated in nvme_attach(). */ error = nvme_qpair_construct(qpair, i+1, /* qpair ID */ ctrlr->msix_enabled ? i+1 : 0, /* vector */ num_entries, num_trackers, ctrlr); if (error) return (error); /* * Do not bother binding interrupts if we only have one I/O * interrupt thread for this controller. */ if (ctrlr->num_io_queues > 1) bus_bind_intr(ctrlr->dev, qpair->res, i * ctrlr->num_cpus_per_ioq); } return (0); } static void nvme_ctrlr_fail(struct nvme_controller *ctrlr) { int i; ctrlr->is_failed = TRUE; nvme_qpair_fail(&ctrlr->adminq); if (ctrlr->ioq != NULL) { for (i = 0; i < ctrlr->num_io_queues; i++) nvme_qpair_fail(&ctrlr->ioq[i]); } nvme_notify_fail_consumers(ctrlr); } void nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr, struct nvme_request *req) { mtx_lock(&ctrlr->lock); STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq); mtx_unlock(&ctrlr->lock); taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task); } static void nvme_ctrlr_fail_req_task(void *arg, int pending) { struct nvme_controller *ctrlr = arg; struct nvme_request *req; mtx_lock(&ctrlr->lock); while ((req = STAILQ_FIRST(&ctrlr->fail_req)) != NULL) { STAILQ_REMOVE_HEAD(&ctrlr->fail_req, stailq); mtx_unlock(&ctrlr->lock); nvme_qpair_manual_complete_request(req->qpair, req, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST); mtx_lock(&ctrlr->lock); } mtx_unlock(&ctrlr->lock); } static int nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) { int ms_waited; uint32_t csts; csts = nvme_mmio_read_4(ctrlr, csts); ms_waited = 0; while (((csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK) != desired_val) { if (ms_waited++ > ctrlr->ready_timeout_in_ms) { nvme_printf(ctrlr, "controller ready did not become %d " "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms); return (ENXIO); } DELAY(1000); csts = nvme_mmio_read_4(ctrlr, csts); } return (0); } static int nvme_ctrlr_disable(struct nvme_controller *ctrlr) { uint32_t cc; uint32_t csts; uint8_t en, rdy; int err; cc = nvme_mmio_read_4(ctrlr, cc); csts = nvme_mmio_read_4(ctrlr, csts); en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK; rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK; /* * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY * isn't the desired value. Short circuit if we're already disabled. */ if (en == 1) { if (rdy == 0) { /* EN == 1, wait for RDY == 1 or fail */ err = nvme_ctrlr_wait_for_ready(ctrlr, 1); if (err != 0) return (err); } } else { /* EN == 0 already wait for RDY == 0 */ if (rdy == 0) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); } cc &= ~NVME_CC_REG_EN_MASK; nvme_mmio_write_4(ctrlr, cc, cc); /* * Some drives have issues with accessing the mmio after we * disable, so delay for a bit after we write the bit to * cope with these issues. */ if (ctrlr->quirks & QUIRK_DELAY_B4_CHK_RDY) pause("nvmeR", B4_CHK_RDY_DELAY_MS * hz / 1000); return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); } static int nvme_ctrlr_enable(struct nvme_controller *ctrlr) { uint32_t cc; uint32_t csts; uint32_t aqa; uint32_t qsize; uint8_t en, rdy; int err; cc = nvme_mmio_read_4(ctrlr, cc); csts = nvme_mmio_read_4(ctrlr, csts); en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK; rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK; /* * See note in nvme_ctrlr_disable. Short circuit if we're already enabled. */ if (en == 1) { if (rdy == 1) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); } else { /* EN == 0 already wait for RDY == 0 or fail */ err = nvme_ctrlr_wait_for_ready(ctrlr, 0); if (err != 0) return (err); } nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); DELAY(5000); nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); DELAY(5000); /* acqs and asqs are 0-based. */ qsize = ctrlr->adminq.num_entries - 1; aqa = 0; aqa = (qsize & NVME_AQA_REG_ACQS_MASK) << NVME_AQA_REG_ACQS_SHIFT; aqa |= (qsize & NVME_AQA_REG_ASQS_MASK) << NVME_AQA_REG_ASQS_SHIFT; nvme_mmio_write_4(ctrlr, aqa, aqa); DELAY(5000); /* Initialization values for CC */ cc = 0; cc |= 1 << NVME_CC_REG_EN_SHIFT; cc |= 0 << NVME_CC_REG_CSS_SHIFT; cc |= 0 << NVME_CC_REG_AMS_SHIFT; cc |= 0 << NVME_CC_REG_SHN_SHIFT; cc |= 6 << NVME_CC_REG_IOSQES_SHIFT; /* SQ entry size == 64 == 2^6 */ cc |= 4 << NVME_CC_REG_IOCQES_SHIFT; /* CQ entry size == 16 == 2^4 */ /* This evaluates to 0, which is according to spec. */ cc |= (PAGE_SIZE >> 13) << NVME_CC_REG_MPS_SHIFT; nvme_mmio_write_4(ctrlr, cc, cc); return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); } int nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) { int i, err; nvme_admin_qpair_disable(&ctrlr->adminq); /* * I/O queues are not allocated before the initial HW * reset, so do not try to disable them. Use is_initialized * to determine if this is the initial HW reset. */ if (ctrlr->is_initialized) { for (i = 0; i < ctrlr->num_io_queues; i++) nvme_io_qpair_disable(&ctrlr->ioq[i]); } DELAY(100*1000); err = nvme_ctrlr_disable(ctrlr); if (err != 0) return err; return (nvme_ctrlr_enable(ctrlr)); } void nvme_ctrlr_reset(struct nvme_controller *ctrlr) { int cmpset; cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1); if (cmpset == 0 || ctrlr->is_failed) /* * Controller is already resetting or has failed. Return * immediately since there is no need to kick off another * reset in these cases. */ return; taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task); } static int nvme_ctrlr_identify(struct nvme_controller *ctrlr) { struct nvme_completion_poll_status status; status.done = 0; nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_identify_controller failed!\n"); return (ENXIO); } /* Convert data to host endian */ nvme_controller_data_swapbytes(&ctrlr->cdata); /* * Use MDTS to ensure our default max_xfer_size doesn't exceed what the * controller supports. */ if (ctrlr->cdata.mdts > 0) ctrlr->max_xfer_size = min(ctrlr->max_xfer_size, ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts))); return (0); } static int nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) { struct nvme_completion_poll_status status; int cq_allocated, sq_allocated; status.done = 0; nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_ctrlr_set_num_qpairs failed!\n"); return (ENXIO); } /* * Data in cdw0 is 0-based. * Lower 16-bits indicate number of submission queues allocated. * Upper 16-bits indicate number of completion queues allocated. */ sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1; cq_allocated = (status.cpl.cdw0 >> 16) + 1; /* * Controller may allocate more queues than we requested, * so use the minimum of the number requested and what was * actually allocated. */ ctrlr->num_io_queues = min(ctrlr->num_io_queues, sq_allocated); ctrlr->num_io_queues = min(ctrlr->num_io_queues, cq_allocated); return (0); } static int nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr) { struct nvme_completion_poll_status status; struct nvme_qpair *qpair; int i; for (i = 0; i < ctrlr->num_io_queues; i++) { qpair = &ctrlr->ioq[i]; status.done = 0; nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_create_io_cq failed!\n"); return (ENXIO); } status.done = 0; nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_create_io_sq failed!\n"); return (ENXIO); } } return (0); } static int nvme_ctrlr_destroy_qpairs(struct nvme_controller *ctrlr) { struct nvme_completion_poll_status status; struct nvme_qpair *qpair; for (int i = 0; i < ctrlr->num_io_queues; i++) { qpair = &ctrlr->ioq[i]; status.done = 0; nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_destroy_io_sq failed!\n"); return (ENXIO); } status.done = 0; nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_destroy_io_cq failed!\n"); return (ENXIO); } } return (0); } static int nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr) { struct nvme_namespace *ns; uint32_t i; for (i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) { ns = &ctrlr->ns[i]; nvme_ns_construct(ns, i+1, ctrlr); } return (0); } static boolean_t is_log_page_id_valid(uint8_t page_id) { switch (page_id) { case NVME_LOG_ERROR: case NVME_LOG_HEALTH_INFORMATION: case NVME_LOG_FIRMWARE_SLOT: case NVME_LOG_CHANGED_NAMESPACE: return (TRUE); } return (FALSE); } static uint32_t nvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id) { uint32_t log_page_size; switch (page_id) { case NVME_LOG_ERROR: log_page_size = min( sizeof(struct nvme_error_information_entry) * (ctrlr->cdata.elpe + 1), NVME_MAX_AER_LOG_SIZE); break; case NVME_LOG_HEALTH_INFORMATION: log_page_size = sizeof(struct nvme_health_information_page); break; case NVME_LOG_FIRMWARE_SLOT: log_page_size = sizeof(struct nvme_firmware_page); break; case NVME_LOG_CHANGED_NAMESPACE: log_page_size = sizeof(struct nvme_ns_list); break; default: log_page_size = 0; break; } return (log_page_size); } static void nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr, uint8_t state) { if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE) nvme_printf(ctrlr, "available spare space below threshold\n"); if (state & NVME_CRIT_WARN_ST_TEMPERATURE) nvme_printf(ctrlr, "temperature above threshold\n"); if (state & NVME_CRIT_WARN_ST_DEVICE_RELIABILITY) nvme_printf(ctrlr, "device reliability degraded\n"); if (state & NVME_CRIT_WARN_ST_READ_ONLY) nvme_printf(ctrlr, "media placed in read only mode\n"); if (state & NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP) nvme_printf(ctrlr, "volatile memory backup device failed\n"); if (state & NVME_CRIT_WARN_ST_RESERVED_MASK) nvme_printf(ctrlr, "unknown critical warning(s): state = 0x%02x\n", state); } static void nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl) { struct nvme_async_event_request *aer = arg; struct nvme_health_information_page *health_info; struct nvme_ns_list *nsl; struct nvme_error_information_entry *err; int i; /* * If the log page fetch for some reason completed with an error, * don't pass log page data to the consumers. In practice, this case * should never happen. */ if (nvme_completion_is_error(cpl)) nvme_notify_async_consumers(aer->ctrlr, &aer->cpl, aer->log_page_id, NULL, 0); else { /* Convert data to host endian */ switch (aer->log_page_id) { case NVME_LOG_ERROR: err = (struct nvme_error_information_entry *)aer->log_page_buffer; for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++) nvme_error_information_entry_swapbytes(err++); break; case NVME_LOG_HEALTH_INFORMATION: nvme_health_information_page_swapbytes( (struct nvme_health_information_page *)aer->log_page_buffer); break; case NVME_LOG_FIRMWARE_SLOT: nvme_firmware_page_swapbytes( (struct nvme_firmware_page *)aer->log_page_buffer); break; case NVME_LOG_CHANGED_NAMESPACE: nvme_ns_list_swapbytes( (struct nvme_ns_list *)aer->log_page_buffer); break; case INTEL_LOG_TEMP_STATS: intel_log_temp_stats_swapbytes( (struct intel_log_temp_stats *)aer->log_page_buffer); break; default: break; } if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) { health_info = (struct nvme_health_information_page *) aer->log_page_buffer; nvme_ctrlr_log_critical_warnings(aer->ctrlr, health_info->critical_warning); /* * Critical warnings reported through the * SMART/health log page are persistent, so * clear the associated bits in the async event * config so that we do not receive repeated * notifications for the same event. */ aer->ctrlr->async_event_config &= ~health_info->critical_warning; nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr, aer->ctrlr->async_event_config, NULL, NULL); } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE && !nvme_use_nvd) { nsl = (struct nvme_ns_list *)aer->log_page_buffer; for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) { if (nsl->ns[i] > NVME_MAX_NAMESPACES) break; nvme_notify_ns(aer->ctrlr, nsl->ns[i]); } } /* * Pass the cpl data from the original async event completion, * not the log page fetch. */ nvme_notify_async_consumers(aer->ctrlr, &aer->cpl, aer->log_page_id, aer->log_page_buffer, aer->log_page_size); } /* * Repost another asynchronous event request to replace the one * that just completed. */ nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer); } static void nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl) { struct nvme_async_event_request *aer = arg; if (nvme_completion_is_error(cpl)) { /* * Do not retry failed async event requests. This avoids * infinite loops where a new async event request is submitted * to replace the one just failed, only to fail again and * perpetuate the loop. */ return; } /* Associated log page is in bits 23:16 of completion entry dw0. */ aer->log_page_id = (cpl->cdw0 & 0xFF0000) >> 16; nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x," " page 0x%02x)\n", (cpl->cdw0 & 0x03), (cpl->cdw0 & 0xFF00) >> 8, aer->log_page_id); if (is_log_page_id_valid(aer->log_page_id)) { aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id); memcpy(&aer->cpl, cpl, sizeof(*cpl)); nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id, NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, aer->log_page_size, nvme_ctrlr_async_event_log_page_cb, aer); /* Wait to notify consumers until after log page is fetched. */ } else { nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id, NULL, 0); /* * Repost another asynchronous event request to replace the one * that just completed. */ nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer); } } static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, struct nvme_async_event_request *aer) { struct nvme_request *req; aer->ctrlr = ctrlr; req = nvme_allocate_request_null(nvme_ctrlr_async_event_cb, aer); aer->req = req; /* * Disable timeout here, since asynchronous event requests should by * nature never be timed out. */ req->timeout = FALSE; req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST; nvme_ctrlr_submit_admin_request(ctrlr, req); } static void nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) { struct nvme_completion_poll_status status; struct nvme_async_event_request *aer; uint32_t i; ctrlr->async_event_config = NVME_CRIT_WARN_ST_AVAILABLE_SPARE | NVME_CRIT_WARN_ST_DEVICE_RELIABILITY | NVME_CRIT_WARN_ST_READ_ONLY | NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP; if (ctrlr->cdata.ver >= NVME_REV(1, 2)) ctrlr->async_event_config |= 0x300; status.done = 0; nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD, 0, NULL, 0, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl) || (status.cpl.cdw0 & 0xFFFF) == 0xFFFF || (status.cpl.cdw0 & 0xFFFF) == 0x0000) { nvme_printf(ctrlr, "temperature threshold not supported\n"); } else ctrlr->async_event_config |= NVME_CRIT_WARN_ST_TEMPERATURE; nvme_ctrlr_cmd_set_async_event_config(ctrlr, ctrlr->async_event_config, NULL, NULL); /* aerl is a zero-based value, so we need to add 1 here. */ ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1)); for (i = 0; i < ctrlr->num_aers; i++) { aer = &ctrlr->aer[i]; nvme_ctrlr_construct_and_submit_aer(ctrlr, aer); } } static void nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) { ctrlr->int_coal_time = 0; TUNABLE_INT_FETCH("hw.nvme.int_coal_time", &ctrlr->int_coal_time); ctrlr->int_coal_threshold = 0; TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold", &ctrlr->int_coal_threshold); nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, NULL); } static void nvme_ctrlr_start(void *ctrlr_arg) { struct nvme_controller *ctrlr = ctrlr_arg; uint32_t old_num_io_queues; int i; /* * Only reset adminq here when we are restarting the * controller after a reset. During initialization, * we have already submitted admin commands to get * the number of I/O queues supported, so cannot reset * the adminq again here. */ if (ctrlr->is_resetting) { nvme_qpair_reset(&ctrlr->adminq); } for (i = 0; i < ctrlr->num_io_queues; i++) nvme_qpair_reset(&ctrlr->ioq[i]); nvme_admin_qpair_enable(&ctrlr->adminq); if (nvme_ctrlr_identify(ctrlr) != 0) { nvme_ctrlr_fail(ctrlr); return; } /* * The number of qpairs are determined during controller initialization, * including using NVMe SET_FEATURES/NUMBER_OF_QUEUES to determine the * HW limit. We call SET_FEATURES again here so that it gets called * after any reset for controllers that depend on the driver to * explicit specify how many queues it will use. This value should * never change between resets, so panic if somehow that does happen. */ if (ctrlr->is_resetting) { old_num_io_queues = ctrlr->num_io_queues; if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { nvme_ctrlr_fail(ctrlr); return; } if (old_num_io_queues != ctrlr->num_io_queues) { panic("num_io_queues changed from %u to %u", old_num_io_queues, ctrlr->num_io_queues); } } if (nvme_ctrlr_create_qpairs(ctrlr) != 0) { nvme_ctrlr_fail(ctrlr); return; } if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) { nvme_ctrlr_fail(ctrlr); return; } nvme_ctrlr_configure_aer(ctrlr); nvme_ctrlr_configure_int_coalescing(ctrlr); for (i = 0; i < ctrlr->num_io_queues; i++) nvme_io_qpair_enable(&ctrlr->ioq[i]); } void nvme_ctrlr_start_config_hook(void *arg) { struct nvme_controller *ctrlr = arg; nvme_qpair_reset(&ctrlr->adminq); nvme_admin_qpair_enable(&ctrlr->adminq); if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 && nvme_ctrlr_construct_io_qpairs(ctrlr) == 0) nvme_ctrlr_start(ctrlr); else nvme_ctrlr_fail(ctrlr); nvme_sysctl_initialize_ctrlr(ctrlr); config_intrhook_disestablish(&ctrlr->config_hook); ctrlr->is_initialized = 1; nvme_notify_new_controller(ctrlr); } static void nvme_ctrlr_reset_task(void *arg, int pending) { struct nvme_controller *ctrlr = arg; int status; nvme_printf(ctrlr, "resetting controller\n"); status = nvme_ctrlr_hw_reset(ctrlr); /* * Use pause instead of DELAY, so that we yield to any nvme interrupt * handlers on this CPU that were blocked on a qpair lock. We want * all nvme interrupts completed before proceeding with restarting the * controller. * * XXX - any way to guarantee the interrupt handlers have quiesced? */ pause("nvmereset", hz / 10); if (status == 0) nvme_ctrlr_start(ctrlr); else nvme_ctrlr_fail(ctrlr); atomic_cmpset_32(&ctrlr->is_resetting, 1, 0); } /* * Poll all the queues enabled on the device for completion. */ void nvme_ctrlr_poll(struct nvme_controller *ctrlr) { int i; nvme_qpair_process_completions(&ctrlr->adminq); for (i = 0; i < ctrlr->num_io_queues; i++) if (ctrlr->ioq && ctrlr->ioq[i].cpl) nvme_qpair_process_completions(&ctrlr->ioq[i]); } /* * Poll the single-vector intertrupt case: num_io_queues will be 1 and * there's only a single vector. While we're polling, we mask further * interrupts in the controller. */ void nvme_ctrlr_intx_handler(void *arg) { struct nvme_controller *ctrlr = arg; nvme_mmio_write_4(ctrlr, intms, 1); nvme_ctrlr_poll(ctrlr); nvme_mmio_write_4(ctrlr, intmc, 1); } static int nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr) { ctrlr->msix_enabled = 0; ctrlr->num_io_queues = 1; ctrlr->num_cpus_per_ioq = mp_ncpus; ctrlr->rid = 0; ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE); if (ctrlr->res == NULL) { nvme_printf(ctrlr, "unable to allocate shared IRQ\n"); return (ENOMEM); } bus_setup_intr(ctrlr->dev, ctrlr->res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler, ctrlr, &ctrlr->tag); if (ctrlr->tag == NULL) { nvme_printf(ctrlr, "unable to setup intx handler\n"); return (ENOMEM); } return (0); } static void nvme_pt_done(void *arg, const struct nvme_completion *cpl) { struct nvme_pt_command *pt = arg; struct mtx *mtx = pt->driver_lock; uint16_t status; bzero(&pt->cpl, sizeof(pt->cpl)); pt->cpl.cdw0 = cpl->cdw0; status = cpl->status; status &= ~NVME_STATUS_P_MASK; pt->cpl.status = status; mtx_lock(mtx); pt->driver_lock = NULL; wakeup(pt); mtx_unlock(mtx); } int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer, int is_admin_cmd) { struct nvme_request *req; struct mtx *mtx; struct buf *buf = NULL; int ret = 0; vm_offset_t addr, end; if (pt->len > 0) { /* * vmapbuf calls vm_fault_quick_hold_pages which only maps full * pages. Ensure this request has fewer than MAXPHYS bytes when * extended to full pages. */ addr = (vm_offset_t)pt->buf; end = round_page(addr + pt->len); addr = trunc_page(addr); if (end - addr > MAXPHYS) return EIO; if (pt->len > ctrlr->max_xfer_size) { nvme_printf(ctrlr, "pt->len (%d) " "exceeds max_xfer_size (%d)\n", pt->len, ctrlr->max_xfer_size); return EIO; } if (is_user_buffer) { /* * Ensure the user buffer is wired for the duration of * this passthrough command. */ PHOLD(curproc); buf = getpbuf(NULL); buf->b_data = pt->buf; buf->b_bufsize = pt->len; buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE; if (vmapbuf(buf, 1) < 0) { ret = EFAULT; goto err; } req = nvme_allocate_request_vaddr(buf->b_data, pt->len, nvme_pt_done, pt); } else req = nvme_allocate_request_vaddr(pt->buf, pt->len, nvme_pt_done, pt); } else req = nvme_allocate_request_null(nvme_pt_done, pt); /* Assume userspace already converted to little-endian */ req->cmd.opc = pt->cmd.opc; req->cmd.fuse = pt->cmd.fuse; req->cmd.rsvd2 = pt->cmd.rsvd2; req->cmd.rsvd3 = pt->cmd.rsvd3; req->cmd.cdw10 = pt->cmd.cdw10; req->cmd.cdw11 = pt->cmd.cdw11; req->cmd.cdw12 = pt->cmd.cdw12; req->cmd.cdw13 = pt->cmd.cdw13; req->cmd.cdw14 = pt->cmd.cdw14; req->cmd.cdw15 = pt->cmd.cdw15; req->cmd.nsid = htole32(nsid); mtx = mtx_pool_find(mtxpool_sleep, pt); pt->driver_lock = mtx; if (is_admin_cmd) nvme_ctrlr_submit_admin_request(ctrlr, req); else nvme_ctrlr_submit_io_request(ctrlr, req); mtx_lock(mtx); while (pt->driver_lock != NULL) mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0); mtx_unlock(mtx); err: if (buf != NULL) { relpbuf(buf, NULL); PRELE(curproc); } return (ret); } static int nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct nvme_controller *ctrlr; struct nvme_pt_command *pt; ctrlr = cdev->si_drv1; switch (cmd) { case NVME_RESET_CONTROLLER: nvme_ctrlr_reset(ctrlr); break; case NVME_PASSTHROUGH_CMD: pt = (struct nvme_pt_command *)arg; return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, le32toh(pt->cmd.nsid), 1 /* is_user_buffer */, 1 /* is_admin_cmd */)); + case NVME_GET_NSID: + { + struct nvme_get_nsid *gnsid = (struct nvme_get_nsid *)arg; + strncpy(gnsid->cdev, device_get_nameunit(ctrlr->dev), + sizeof(gnsid->cdev)); + gnsid->nsid = 0; + break; + } default: return (ENOTTY); } return (0); } static struct cdevsw nvme_ctrlr_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_ioctl = nvme_ctrlr_ioctl }; static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr) { device_t dev; int per_cpu_io_queues; int min_cpus_per_ioq; int num_vectors_requested, num_vectors_allocated; int num_vectors_available; dev = ctrlr->dev; min_cpus_per_ioq = 1; TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq); if (min_cpus_per_ioq < 1) { min_cpus_per_ioq = 1; } else if (min_cpus_per_ioq > mp_ncpus) { min_cpus_per_ioq = mp_ncpus; } per_cpu_io_queues = 1; TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); if (per_cpu_io_queues == 0) { min_cpus_per_ioq = mp_ncpus; } ctrlr->force_intx = 0; TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); /* * FreeBSD currently cannot allocate more than about 190 vectors at * boot, meaning that systems with high core count and many devices * requesting per-CPU interrupt vectors will not get their full * allotment. So first, try to allocate as many as we may need to * understand what is available, then immediately release them. * Then figure out how many of those we will actually use, based on * assigning an equal number of cores to each I/O queue. */ /* One vector for per core I/O queue, plus one vector for admin queue. */ num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1); if (pci_alloc_msix(dev, &num_vectors_available) != 0) { num_vectors_available = 0; } pci_release_msi(dev); if (ctrlr->force_intx || num_vectors_available < 2) { nvme_ctrlr_configure_intx(ctrlr); return; } /* * Do not use all vectors for I/O queues - one must be saved for the * admin queue. */ ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq, howmany(mp_ncpus, num_vectors_available - 1)); ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq); num_vectors_requested = ctrlr->num_io_queues + 1; num_vectors_allocated = num_vectors_requested; /* * Now just allocate the number of vectors we need. This should * succeed, since we previously called pci_alloc_msix() * successfully returning at least this many vectors, but just to * be safe, if something goes wrong just revert to INTx. */ if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) { nvme_ctrlr_configure_intx(ctrlr); return; } if (num_vectors_allocated < num_vectors_requested) { pci_release_msi(dev); nvme_ctrlr_configure_intx(ctrlr); return; } ctrlr->msix_enabled = 1; } int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) { struct make_dev_args md_args; uint32_t cap_lo; uint32_t cap_hi; uint32_t to; uint8_t dstrd; uint8_t mpsmin; int status, timeout_period; ctrlr->dev = dev; mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF); status = nvme_ctrlr_allocate_bar(ctrlr); if (status != 0) return (status); /* * Software emulators may set the doorbell stride to something * other than zero, but this driver is not set up to handle that. */ cap_hi = nvme_mmio_read_4(ctrlr, cap_hi); dstrd = NVME_CAP_HI_DSTRD(cap_hi); if (dstrd != 0) return (ENXIO); mpsmin = NVME_CAP_HI_MPSMIN(cap_hi); ctrlr->min_page_size = 1 << (12 + mpsmin); /* Get ready timeout value from controller, in units of 500ms. */ cap_lo = nvme_mmio_read_4(ctrlr, cap_lo); to = NVME_CAP_LO_TO(cap_lo) + 1; ctrlr->ready_timeout_in_ms = to * 500; timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD; TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period); timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD); timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD); ctrlr->timeout_period = timeout_period; nvme_retry_count = NVME_DEFAULT_RETRY_COUNT; TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count); ctrlr->enable_aborts = 0; TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts); nvme_ctrlr_setup_interrupts(ctrlr); ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; if (nvme_ctrlr_construct_admin_qpair(ctrlr) != 0) return (ENXIO); ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK, taskqueue_thread_enqueue, &ctrlr->taskqueue); taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq"); ctrlr->is_resetting = 0; ctrlr->is_initialized = 0; ctrlr->notification_sent = 0; TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr); TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr); STAILQ_INIT(&ctrlr->fail_req); ctrlr->is_failed = FALSE; make_dev_args_init(&md_args); md_args.mda_devsw = &nvme_ctrlr_cdevsw; md_args.mda_uid = UID_ROOT; md_args.mda_gid = GID_WHEEL; md_args.mda_mode = 0600; md_args.mda_unit = device_get_unit(dev); md_args.mda_si_drv1 = (void *)ctrlr; status = make_dev_s(&md_args, &ctrlr->cdev, "nvme%d", device_get_unit(dev)); if (status != 0) return (ENXIO); return (0); } void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) { int i; if (ctrlr->resource == NULL) goto nores; nvme_notify_fail_consumers(ctrlr); for (i = 0; i < NVME_MAX_NAMESPACES; i++) nvme_ns_destruct(&ctrlr->ns[i]); if (ctrlr->cdev) destroy_dev(ctrlr->cdev); nvme_ctrlr_destroy_qpairs(ctrlr); for (i = 0; i < ctrlr->num_io_queues; i++) { nvme_io_qpair_destroy(&ctrlr->ioq[i]); } free(ctrlr->ioq, M_NVME); nvme_admin_qpair_destroy(&ctrlr->adminq); /* * Notify the controller of a shutdown, even though this is due to * a driver unload, not a system shutdown (this path is not invoked * during shutdown). This ensures the controller receives a * shutdown notification in case the system is shutdown before * reloading the driver. */ nvme_ctrlr_shutdown(ctrlr); nvme_ctrlr_disable(ctrlr); if (ctrlr->taskqueue) taskqueue_free(ctrlr->taskqueue); if (ctrlr->tag) bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag); if (ctrlr->res) bus_release_resource(ctrlr->dev, SYS_RES_IRQ, rman_get_rid(ctrlr->res), ctrlr->res); if (ctrlr->msix_enabled) pci_release_msi(dev); if (ctrlr->bar4_resource != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, ctrlr->bar4_resource_id, ctrlr->bar4_resource); } bus_release_resource(dev, SYS_RES_MEMORY, ctrlr->resource_id, ctrlr->resource); nores: mtx_destroy(&ctrlr->lock); } void nvme_ctrlr_shutdown(struct nvme_controller *ctrlr) { uint32_t cc; uint32_t csts; int ticks = 0; cc = nvme_mmio_read_4(ctrlr, cc); cc &= ~(NVME_CC_REG_SHN_MASK << NVME_CC_REG_SHN_SHIFT); cc |= NVME_SHN_NORMAL << NVME_CC_REG_SHN_SHIFT; nvme_mmio_write_4(ctrlr, cc, cc); csts = nvme_mmio_read_4(ctrlr, csts); while ((NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) { pause("nvme shn", 1); csts = nvme_mmio_read_4(ctrlr, csts); } if (NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) nvme_printf(ctrlr, "did not complete shutdown within 5 seconds " "of notification\n"); } void nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, struct nvme_request *req) { nvme_qpair_submit_request(&ctrlr->adminq, req); } void nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, struct nvme_request *req) { struct nvme_qpair *qpair; qpair = &ctrlr->ioq[curcpu / ctrlr->num_cpus_per_ioq]; nvme_qpair_submit_request(qpair, req); } device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr) { return (ctrlr->dev); } const struct nvme_controller_data * nvme_ctrlr_get_data(struct nvme_controller *ctrlr) { return (&ctrlr->cdata); } Index: stable/12/sys/dev/nvme/nvme_ns.c =================================================================== --- stable/12/sys/dev/nvme/nvme_ns.c (revision 350945) +++ stable/12/sys/dev/nvme/nvme_ns.c (revision 350946) @@ -1,605 +1,620 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2013 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nvme_private.h" static void nvme_bio_child_inbed(struct bio *parent, int bio_error); static void nvme_bio_child_done(void *arg, const struct nvme_completion *cpl); static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t alignment); static void nvme_free_child_bios(int num_bios, struct bio **child_bios); static struct bio ** nvme_allocate_child_bios(int num_bios); static struct bio ** nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios); static int nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp, uint32_t alignment); static int nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct nvme_namespace *ns; struct nvme_controller *ctrlr; struct nvme_pt_command *pt; ns = cdev->si_drv1; ctrlr = ns->ctrlr; switch (cmd) { case NVME_IO_TEST: case NVME_BIO_TEST: nvme_ns_test(ns, cmd, arg); break; case NVME_PASSTHROUGH_CMD: pt = (struct nvme_pt_command *)arg; return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, 1 /* is_user_buffer */, 0 /* is_admin_cmd */)); + case NVME_GET_NSID: + { + struct nvme_get_nsid *gnsid = (struct nvme_get_nsid *)arg; + strncpy(gnsid->cdev, device_get_nameunit(ctrlr->dev), + sizeof(gnsid->cdev)); + gnsid->nsid = ns->id; + break; + } case DIOCGMEDIASIZE: *(off_t *)arg = (off_t)nvme_ns_get_size(ns); break; case DIOCGSECTORSIZE: *(u_int *)arg = nvme_ns_get_sector_size(ns); break; default: return (ENOTTY); } return (0); } static int nvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused, struct thread *td) { int error = 0; if (flags & FWRITE) error = securelevel_gt(td->td_ucred, 0); return (error); } static int nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused, struct thread *td) { return (0); } static void nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) { struct bio *bp = arg; /* * TODO: add more extensive translation of NVMe status codes * to different bio error codes (i.e. EIO, EINVAL, etc.) */ if (nvme_completion_is_error(cpl)) { bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; } else bp->bio_resid = 0; biodone(bp); } static void nvme_ns_strategy(struct bio *bp) { struct nvme_namespace *ns; int err; ns = bp->bio_dev->si_drv1; err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done); if (err) { bp->bio_error = err; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); } } static struct cdevsw nvme_ns_cdevsw = { .d_version = D_VERSION, .d_flags = D_DISK, .d_read = physread, .d_write = physwrite, .d_open = nvme_ns_open, .d_close = nvme_ns_close, .d_strategy = nvme_ns_strategy, .d_ioctl = nvme_ns_ioctl }; uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns) { return ns->ctrlr->max_xfer_size; } uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns) { uint8_t flbas_fmt, lbads; flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; lbads = (ns->data.lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & NVME_NS_DATA_LBAF_LBADS_MASK; return (1 << lbads); } uint64_t nvme_ns_get_num_sectors(struct nvme_namespace *ns) { return (ns->data.nsze); } uint64_t nvme_ns_get_size(struct nvme_namespace *ns) { return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns)); } uint32_t nvme_ns_get_flags(struct nvme_namespace *ns) { return (ns->flags); } const char * nvme_ns_get_serial_number(struct nvme_namespace *ns) { return ((const char *)ns->ctrlr->cdata.sn); } const char * nvme_ns_get_model_number(struct nvme_namespace *ns) { return ((const char *)ns->ctrlr->cdata.mn); } const struct nvme_namespace_data * nvme_ns_get_data(struct nvme_namespace *ns) { return (&ns->data); } uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns) { return (ns->stripesize); } static void nvme_ns_bio_done(void *arg, const struct nvme_completion *status) { struct bio *bp = arg; nvme_cb_fn_t bp_cb_fn; bp_cb_fn = bp->bio_driver1; if (bp->bio_driver2) free(bp->bio_driver2, M_NVME); if (nvme_completion_is_error(status)) { bp->bio_flags |= BIO_ERROR; if (bp->bio_error == 0) bp->bio_error = EIO; } if ((bp->bio_flags & BIO_ERROR) == 0) bp->bio_resid = 0; else bp->bio_resid = bp->bio_bcount; bp_cb_fn(bp, status); } static void nvme_bio_child_inbed(struct bio *parent, int bio_error) { struct nvme_completion parent_cpl; int children, inbed; if (bio_error != 0) { parent->bio_flags |= BIO_ERROR; parent->bio_error = bio_error; } /* * atomic_fetchadd will return value before adding 1, so we still * must add 1 to get the updated inbed number. Save bio_children * before incrementing to guard against race conditions when * two children bios complete on different queues. */ children = atomic_load_acq_int(&parent->bio_children); inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1; if (inbed == children) { bzero(&parent_cpl, sizeof(parent_cpl)); if (parent->bio_flags & BIO_ERROR) { parent_cpl.status &= ~(NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT); parent_cpl.status |= (NVME_SC_DATA_TRANSFER_ERROR) << NVME_STATUS_SC_SHIFT; } nvme_ns_bio_done(parent, &parent_cpl); } } static void nvme_bio_child_done(void *arg, const struct nvme_completion *cpl) { struct bio *child = arg; struct bio *parent; int bio_error; parent = child->bio_parent; g_destroy_bio(child); bio_error = nvme_completion_is_error(cpl) ? EIO : 0; nvme_bio_child_inbed(parent, bio_error); } static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align) { uint32_t num_segs, offset, remainder; if (align == 0) return (1); KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n")); num_segs = size / align; remainder = size & (align - 1); offset = addr & (align - 1); if (remainder > 0 || offset > 0) num_segs += 1 + (remainder + offset - 1) / align; return (num_segs); } static void nvme_free_child_bios(int num_bios, struct bio **child_bios) { int i; for (i = 0; i < num_bios; i++) { if (child_bios[i] != NULL) g_destroy_bio(child_bios[i]); } free(child_bios, M_NVME); } static struct bio ** nvme_allocate_child_bios(int num_bios) { struct bio **child_bios; int err = 0, i; child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT); if (child_bios == NULL) return (NULL); for (i = 0; i < num_bios; i++) { child_bios[i] = g_new_bio(); if (child_bios[i] == NULL) err = ENOMEM; } if (err == ENOMEM) { nvme_free_child_bios(num_bios, child_bios); return (NULL); } return (child_bios); } static struct bio ** nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios) { struct bio **child_bios; struct bio *child; uint64_t cur_offset; caddr_t data; uint32_t rem_bcount; int i; struct vm_page **ma; uint32_t ma_offset; *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount, alignment); child_bios = nvme_allocate_child_bios(*num_bios); if (child_bios == NULL) return (NULL); bp->bio_children = *num_bios; bp->bio_inbed = 0; cur_offset = bp->bio_offset; rem_bcount = bp->bio_bcount; data = bp->bio_data; ma_offset = bp->bio_ma_offset; ma = bp->bio_ma; for (i = 0; i < *num_bios; i++) { child = child_bios[i]; child->bio_parent = bp; child->bio_cmd = bp->bio_cmd; child->bio_offset = cur_offset; child->bio_bcount = min(rem_bcount, alignment - (cur_offset & (alignment - 1))); child->bio_flags = bp->bio_flags; if (bp->bio_flags & BIO_UNMAPPED) { child->bio_ma_offset = ma_offset; child->bio_ma = ma; child->bio_ma_n = nvme_get_num_segments(child->bio_ma_offset, child->bio_bcount, PAGE_SIZE); ma_offset = (ma_offset + child->bio_bcount) & PAGE_MASK; ma += child->bio_ma_n; if (ma_offset != 0) ma -= 1; } else { child->bio_data = data; data += child->bio_bcount; } cur_offset += child->bio_bcount; rem_bcount -= child->bio_bcount; } return (child_bios); } static int nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp, uint32_t alignment) { struct bio *child; struct bio **child_bios; int err, i, num_bios; child_bios = nvme_construct_child_bios(bp, alignment, &num_bios); if (child_bios == NULL) return (ENOMEM); for (i = 0; i < num_bios; i++) { child = child_bios[i]; err = nvme_ns_bio_process(ns, child, nvme_bio_child_done); if (err != 0) { nvme_bio_child_inbed(bp, err); g_destroy_bio(child); } } free(child_bios, M_NVME); return (0); } int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn) { struct nvme_dsm_range *dsm_range; uint32_t num_bios; int err; bp->bio_driver1 = cb_fn; if (ns->stripesize > 0 && (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount, ns->stripesize); if (num_bios > 1) return (nvme_ns_split_bio(ns, bp, ns->stripesize)); } switch (bp->bio_cmd) { case BIO_READ: err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp); break; case BIO_WRITE: err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp); break; case BIO_FLUSH: err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp); break; case BIO_DELETE: dsm_range = malloc(sizeof(struct nvme_dsm_range), M_NVME, M_ZERO | M_WAITOK); if (!dsm_range) { err = ENOMEM; break; } dsm_range->length = htole32(bp->bio_bcount/nvme_ns_get_sector_size(ns)); dsm_range->starting_lba = htole64(bp->bio_offset/nvme_ns_get_sector_size(ns)); bp->bio_driver2 = dsm_range; err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, nvme_ns_bio_done, bp); if (err != 0) free(dsm_range, M_NVME); break; default: err = EIO; break; } return (err); +} + +int +nvme_ns_ioctl_process(struct nvme_namespace *ns, u_long cmd, caddr_t arg, + int flag, struct thread *td) +{ + return (nvme_ns_ioctl(ns->cdev, cmd, arg, flag, td)); } int nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, struct nvme_controller *ctrlr) { struct make_dev_args md_args; struct nvme_completion_poll_status status; int res; int unit; uint8_t flbas_fmt; uint8_t vwc_present; ns->ctrlr = ctrlr; ns->id = id; ns->stripesize = 0; /* * Older Intel devices advertise in vendor specific space an alignment * that improves performance. If present use for the stripe size. NVMe * 1.3 standardized this as NOIOB, and newer Intel drives use that. */ switch (pci_get_devid(ctrlr->dev)) { case 0x09538086: /* Intel DC PC3500 */ case 0x0a538086: /* Intel DC PC3520 */ case 0x0a548086: /* Intel DC PC4500 */ case 0x0a558086: /* Dell Intel P4600 */ if (ctrlr->cdata.vs[3] != 0) ns->stripesize = (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size; break; default: break; } /* * Namespaces are reconstructed after a controller reset, so check * to make sure we only call mtx_init once on each mtx. * * TODO: Move this somewhere where it gets called at controller * construction time, which is not invoked as part of each * controller reset. */ if (!mtx_initialized(&ns->lock)) mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF); status.done = 0; nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data, nvme_completion_poll_cb, &status); while (!atomic_load_acq_int(&status.done)) pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_identify_namespace failed\n"); return (ENXIO); } /* Convert data to host endian */ nvme_namespace_data_swapbytes(&ns->data); /* * If the size of is zero, chances are this isn't a valid * namespace (eg one that's not been configured yet). The * standard says the entire id will be zeros, so this is a * cheap way to test for that. */ if (ns->data.nsze == 0) return (ENXIO); flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; /* * Note: format is a 0-based value, so > is appropriate here, * not >=. */ if (flbas_fmt > ns->data.nlbaf) { printf("lba format %d exceeds number supported (%d)\n", flbas_fmt, ns->data.nlbaf + 1); return (ENXIO); } if (nvme_ctrlr_has_dataset_mgmt(&ctrlr->cdata)) ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; vwc_present = (ctrlr->cdata.vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & NVME_CTRLR_DATA_VWC_PRESENT_MASK; if (vwc_present) ns->flags |= NVME_NS_FLUSH_SUPPORTED; /* * cdev may have already been created, if we are reconstructing the * namespace after a controller-level reset. */ if (ns->cdev != NULL) return (0); /* * Namespace IDs start at 1, so we need to subtract 1 to create a * correct unit number. */ unit = device_get_unit(ctrlr->dev) * NVME_MAX_NAMESPACES + ns->id - 1; make_dev_args_init(&md_args); md_args.mda_devsw = &nvme_ns_cdevsw; md_args.mda_unit = unit; md_args.mda_mode = 0600; md_args.mda_si_drv1 = ns; res = make_dev_s(&md_args, &ns->cdev, "nvme%dns%d", device_get_unit(ctrlr->dev), ns->id); if (res != 0) return (ENXIO); ns->cdev->si_flags |= SI_UNMAPPED; return (0); } void nvme_ns_destruct(struct nvme_namespace *ns) { if (ns->cdev != NULL) destroy_dev(ns->cdev); } Index: stable/12 =================================================================== --- stable/12 (revision 350945) +++ stable/12 (revision 350946) Property changes on: stable/12 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r350523-350524