diff --git a/sbin/nvmecontrol/Makefile b/sbin/nvmecontrol/Makefile index f534093b1332..81674475ba1f 100644 --- a/sbin/nvmecontrol/Makefile +++ b/sbin/nvmecontrol/Makefile @@ -1,34 +1,40 @@ .include PACKAGE=nvme-tools PROG= nvmecontrol SRCS+= comnd.c +SRCS+= connect.c SRCS+= devlist.c +SRCS+= disconnect.c +SRCS+= discover.c +SRCS+= fabrics.c SRCS+= firmware.c SRCS+= format.c SRCS+= identify.c SRCS+= identify_ext.c SRCS+= logpage.c SRCS+= nc_util.c SRCS+= ns.c SRCS+= nsid.c SRCS+= nvme_util.c SRCS+= nvmecontrol.c SRCS+= passthru.c SRCS+= perftest.c SRCS+= power.c +SRCS+= reconnect.c SRCS+= reset.c SRCS+= resv.c SRCS+= sanitize.c SRCS+= selftest.c +CFLAGS+= -I${SRCTOP}/lib/libnvmf MAN= nvmecontrol.8 LDFLAGS+= -rdynamic -LIBADD+= util +LIBADD+= nvmf util SUBDIR= modules HAS_TESTS= SUBDIR.${MK_TESTS}+= tests .PATH: ${SRCTOP}/sys/dev/nvme .include .include diff --git a/sbin/nvmecontrol/connect.c b/sbin/nvmecontrol/connect.c new file mode 100644 index 000000000000..afb78725a3c7 --- /dev/null +++ b/sbin/nvmecontrol/connect.c @@ -0,0 +1,283 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "comnd.h" +#include "fabrics.h" + +/* + * Settings that are currently hardcoded but could be exposed to the + * user via additional command line options: + * + * - ADMIN queue entries + * - MaxR2T + */ + +static struct options { + const char *transport; + const char *address; + const char *cntlid; + const char *subnqn; + const char *hostnqn; + uint32_t kato; + uint16_t num_io_queues; + uint16_t queue_size; + bool data_digests; + bool flow_control; + bool header_digests; +} opt = { + .transport = "tcp", + .address = NULL, + .cntlid = "dynamic", + .subnqn = NULL, + .hostnqn = NULL, + .kato = NVMF_KATO_DEFAULT / 1000, + .num_io_queues = 1, + .queue_size = 0, + .data_digests = false, + .flow_control = false, + .header_digests = false, +}; + +static void +tcp_association_params(struct nvmf_association_params *params) +{ + params->tcp.pda = 0; + params->tcp.header_digests = opt.header_digests; + params->tcp.data_digests = opt.data_digests; + /* XXX */ + params->tcp.maxr2t = 1; +} + +static int +connect_nvm_controller(enum nvmf_trtype trtype, int adrfam, const char *address, + const char *port, uint16_t cntlid, const char *subnqn) +{ + struct nvme_controller_data cdata; + struct nvmf_association_params aparams; + struct nvmf_qpair *admin, **io; + int error; + + memset(&aparams, 0, sizeof(aparams)); + aparams.sq_flow_control = opt.flow_control; + switch (trtype) { + case NVMF_TRTYPE_TCP: + tcp_association_params(&aparams); + break; + default: + warnx("Unsupported transport %s", nvmf_transport_type(trtype)); + return (EX_UNAVAILABLE); + } + + io = calloc(opt.num_io_queues, sizeof(*io)); + error = connect_nvm_queues(&aparams, trtype, adrfam, address, port, + cntlid, subnqn, opt.hostnqn, opt.kato, &admin, io, + opt.num_io_queues, opt.queue_size, &cdata); + if (error != 0) + return (error); + + error = nvmf_handoff_host(admin, opt.num_io_queues, io, &cdata); + if (error != 0) { + warnc(error, "Failed to handoff queues to kernel"); + return (EX_IOERR); + } + free(io); + return (0); +} + +static void +connect_discovery_entry(struct nvme_discovery_log_entry *entry) +{ + int adrfam; + + switch (entry->trtype) { + case NVMF_TRTYPE_TCP: + switch (entry->adrfam) { + case NVMF_ADRFAM_IPV4: + adrfam = AF_INET; + break; + case NVMF_ADRFAM_IPV6: + adrfam = AF_INET6; + break; + default: + warnx("Skipping unsupported address family for %s", + entry->subnqn); + return; + } + switch (entry->tsas.tcp.sectype) { + case NVME_TCP_SECURITY_NONE: + break; + default: + warnx("Skipping unsupported TCP security type for %s", + entry->subnqn); + return; + } + break; + default: + warnx("Skipping unsupported transport %s for %s", + nvmf_transport_type(entry->trtype), entry->subnqn); + return; + } + + /* + * XXX: Track portids and avoid duplicate connections for a + * given (subnqn,portid)? + */ + + /* XXX: Should this make use of entry->aqsz in some way? */ + connect_nvm_controller(entry->trtype, adrfam, entry->traddr, + entry->trsvcid, entry->cntlid, entry->subnqn); +} + +static void +connect_discovery_log_page(struct nvmf_qpair *qp) +{ + struct nvme_discovery_log *log; + int error; + + error = nvmf_host_fetch_discovery_log_page(qp, &log); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch discovery log page"); + + for (u_int i = 0; i < log->numrec; i++) + connect_discovery_entry(&log->entries[i]); + free(log); +} + +static void +discover_controllers(enum nvmf_trtype trtype, const char *address, + const char *port) +{ + struct nvmf_qpair *qp; + + qp = connect_discovery_adminq(trtype, address, port, opt.hostnqn); + + connect_discovery_log_page(qp); + + nvmf_free_qpair(qp); +} + +static void +connect_fn(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + const char *address, *port; + char *tofree; + u_long cntlid; + int error; + + if (arg_parse(argc, argv, f)) + return; + + if (opt.num_io_queues <= 0) + errx(EX_USAGE, "Invalid number of I/O queues"); + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + if (port == NULL) + errx(EX_USAGE, "Explicit port required"); + + cntlid = nvmf_parse_cntlid(opt.cntlid); + + error = connect_nvm_controller(trtype, AF_UNSPEC, address, port, cntlid, + opt.subnqn); + if (error != 0) + exit(error); + + free(tofree); +} + +static void +connect_all_fn(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + const char *address, *port; + char *tofree; + + if (arg_parse(argc, argv, f)) + return; + + if (opt.num_io_queues <= 0) + errx(EX_USAGE, "Invalid number of I/O queues"); + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + discover_controllers(trtype, address, port); + + free(tofree); +} + +static const struct opts connect_opts[] = { +#define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("transport", 't', arg_string, opt, transport, + "Transport type"), + OPT("cntlid", 'c', arg_string, opt, cntlid, + "Controller ID"), + OPT("nr-io-queues", 'i', arg_uint16, opt, num_io_queues, + "Number of I/O queues"), + OPT("queue-size", 'Q', arg_uint16, opt, queue_size, + "Number of entries in each I/O queue"), + OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato, + "Keep Alive timeout (in seconds)"), + OPT("hostnqn", 'q', arg_string, opt, hostnqn, + "Host NQN"), + OPT("flow_control", 'F', arg_none, opt, flow_control, + "Request SQ flow control"), + OPT("hdr_digests", 'g', arg_none, opt, header_digests, + "Enable TCP PDU header digests"), + OPT("data_digests", 'G', arg_none, opt, data_digests, + "Enable TCP PDU data digests"), + { NULL, 0, arg_none, NULL, NULL } +}; +#undef OPT + +static const struct args connect_args[] = { + { arg_string, &opt.address, "address" }, + { arg_string, &opt.subnqn, "SubNQN" }, + { arg_none, NULL, NULL }, +}; + +static const struct args connect_all_args[] = { + { arg_string, &opt.address, "address" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd connect_cmd = { + .name = "connect", + .fn = connect_fn, + .descr = "Connect to a fabrics controller", + .ctx_size = sizeof(opt), + .opts = connect_opts, + .args = connect_args, +}; + +static struct cmd connect_all_cmd = { + .name = "connect-all", + .fn = connect_all_fn, + .descr = "Discover and connect to fabrics controllers", + .ctx_size = sizeof(opt), + .opts = connect_opts, + .args = connect_all_args, +}; + +CMD_COMMAND(connect_cmd); +CMD_COMMAND(connect_all_cmd); diff --git a/sbin/nvmecontrol/disconnect.c b/sbin/nvmecontrol/disconnect.c new file mode 100644 index 000000000000..b1b6af6271e8 --- /dev/null +++ b/sbin/nvmecontrol/disconnect.c @@ -0,0 +1,82 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static struct options { + const char *dev; +} opt = { + .dev = NULL +}; + +static const struct args args[] = { + { arg_string, &opt.dev, "controller-id|namespace-id|SubNQN" }, + { arg_none, NULL, NULL }, +}; + +static void +disconnect(const struct cmd *f, int argc, char *argv[]) +{ + int error, fd; + char *path; + + if (arg_parse(argc, argv, f)) + return; + if (nvmf_nqn_valid(opt.dev)) { + error = nvmf_disconnect_host(opt.dev); + if (error != 0) + errc(EX_IOERR, error, "failed to disconnect from %s", + opt.dev); + } else { + open_dev(opt.dev, &fd, 1, 1); + get_nsid(fd, &path, NULL); + close(fd); + + error = nvmf_disconnect_host(path); + if (error != 0) + errc(EX_IOERR, error, "failed to disconnect from %s", + path); + } + + exit(0); +} + +static void +disconnect_all(const struct cmd *f __unused, int argc __unused, + char *argv[] __unused) +{ + int error; + + error = nvmf_disconnect_all(); + if (error != 0) + errc(EX_IOERR, error, + "failed to disconnect from remote controllers"); + + exit(0); +} + +static struct cmd disconnect_cmd = { + .name = "disconnect", + .fn = disconnect, + .descr = "Disconnect from a fabrics controller", + .args = args, +}; + +static struct cmd disconnect_all_cmd = { + .name = "disconnect-all", + .fn = disconnect_all, + .descr = "Disconnect from all fabrics controllers", +}; + +CMD_COMMAND(disconnect_cmd); +CMD_COMMAND(disconnect_all_cmd); diff --git a/sbin/nvmecontrol/discover.c b/sbin/nvmecontrol/discover.c new file mode 100644 index 000000000000..c782ebeb7452 --- /dev/null +++ b/sbin/nvmecontrol/discover.c @@ -0,0 +1,300 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include + +#include "comnd.h" +#include "fabrics.h" +#include "nvmecontrol_ext.h" + +static struct options { + const char *transport; + const char *address; + const char *hostnqn; + bool verbose; +} opt = { + .transport = "tcp", + .address = NULL, + .hostnqn = NULL, + .verbose = false, +}; + +static void +identify_controller(struct nvmf_qpair *qp) +{ + struct nvme_controller_data cdata; + int error; + + error = nvmf_host_identify_controller(qp, &cdata); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch controller data"); + nvme_print_controller(&cdata); +} + +static const char * +nvmf_address_family(uint8_t adrfam) +{ + static char buf[8]; + + switch (adrfam) { + case NVMF_ADRFAM_IPV4: + return ("AF_INET"); + case NVMF_ADRFAM_IPV6: + return ("AF_INET6"); + case NVMF_ADRFAM_IB: + return ("InfiniBand"); + case NVMF_ADRFAM_FC: + return ("Fibre Channel"); + case NVMF_ADRFAM_INTRA_HOST: + return ("Intra-host"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", adrfam); + return (buf); + } +} + +static const char * +nvmf_subsystem_type(uint8_t subtype) +{ + static char buf[8]; + + switch (subtype) { + case NVMF_SUBTYPE_DISCOVERY: + return ("Discovery"); + case NVMF_SUBTYPE_NVME: + return ("NVMe"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", subtype); + return (buf); + } +} + +static const char * +nvmf_secure_channel(uint8_t treq) +{ + switch (treq & 0x03) { + case NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED: + return ("Not specified"); + case NVMF_TREQ_SECURE_CHANNEL_REQUIRED: + return ("Required"); + case NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED: + return ("Not required"); + default: + return ("0x03"); + } +} + +static const char * +nvmf_controller_id(uint16_t cntlid) +{ + static char buf[8]; + + switch (cntlid) { + case NVMF_CNTLID_DYNAMIC: + return ("Dynamic"); + case NVMF_CNTLID_STATIC_ANY: + return ("Static"); + default: + snprintf(buf, sizeof(buf), "%u", cntlid); + return (buf); + } +} + +static const char * +nvmf_rdma_service_type(uint8_t qptype) +{ + static char buf[8]; + + switch (qptype) { + case NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED: + return ("Reliable connected"); + case NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM: + return ("Reliable datagram"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", qptype); + return (buf); + } +} + +static const char * +nvmf_rdma_provider_type(uint8_t prtype) +{ + static char buf[8]; + + switch (prtype) { + case NVMF_RDMA_PRTYPE_NONE: + return ("None"); + case NVMF_RDMA_PRTYPE_IB: + return ("InfiniBand"); + case NVMF_RDMA_PRTYPE_ROCE: + return ("RoCE (v1)"); + case NVMF_RDMA_PRTYPE_ROCE2: + return ("RoCE (v2)"); + case NVMF_RDMA_PRTYPE_IWARP: + return ("iWARP"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", prtype); + return (buf); + } +} + +static const char * +nvmf_rdma_cms(uint8_t cms) +{ + static char buf[8]; + + switch (cms) { + case NVMF_RDMA_CMS_RDMA_CM: + return ("RDMA_IP_CM"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", cms); + return (buf); + } +} + +static const char * +nvmf_tcp_security_type(uint8_t sectype) +{ + static char buf[8]; + + switch (sectype) { + case NVME_TCP_SECURITY_NONE: + return ("None"); + case NVME_TCP_SECURITY_TLS_1_2: + return ("TLS 1.2"); + case NVME_TCP_SECURITY_TLS_1_3: + return ("TLS 1.3"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", sectype); + return (buf); + } +} + +static void +print_discovery_entry(u_int i, struct nvme_discovery_log_entry *entry) +{ + printf("Entry %02d\n", i + 1); + printf("========\n"); + printf(" Transport type: %s\n", + nvmf_transport_type(entry->trtype)); + printf(" Address family: %s\n", + nvmf_address_family(entry->adrfam)); + printf(" Subsystem type: %s\n", + nvmf_subsystem_type(entry->subtype)); + printf(" SQ flow control: %s\n", + (entry->treq & (1 << 2)) == 0 ? "required" : "optional"); + printf(" Secure Channel: %s\n", nvmf_secure_channel(entry->treq)); + printf(" Port ID: %u\n", entry->portid); + printf(" Controller ID: %s\n", + nvmf_controller_id(entry->cntlid)); + printf(" Max Admin SQ Size: %u\n", entry->aqsz); + printf(" Sub NQN: %s\n", entry->subnqn); + printf(" Transport address: %s\n", entry->traddr); + printf(" Service identifier: %s\n", entry->trsvcid); + switch (entry->trtype) { + case NVMF_TRTYPE_RDMA: + printf(" RDMA Service Type: %s\n", + nvmf_rdma_service_type(entry->tsas.rdma.rdma_qptype)); + printf(" RDMA Provider Type: %s\n", + nvmf_rdma_provider_type(entry->tsas.rdma.rdma_prtype)); + printf(" RDMA CMS: %s\n", + nvmf_rdma_cms(entry->tsas.rdma.rdma_cms)); + printf(" Partition key: %u\n", + entry->tsas.rdma.rdma_pkey); + break; + case NVMF_TRTYPE_TCP: + printf(" Security Type: %s\n", + nvmf_tcp_security_type(entry->tsas.tcp.sectype)); + break; + } +} + +static void +dump_discovery_log_page(struct nvmf_qpair *qp) +{ + struct nvme_discovery_log *log; + int error; + + error = nvmf_host_fetch_discovery_log_page(qp, &log); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch discovery log page"); + + printf("Discovery\n"); + printf("=========\n"); + if (log->numrec == 0) { + printf("No entries found\n"); + } else { + for (u_int i = 0; i < log->numrec; i++) + print_discovery_entry(i, &log->entries[i]); + } + free(log); +} + +static void +discover(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + struct nvmf_qpair *qp; + const char *address, *port; + char *tofree; + + if (arg_parse(argc, argv, f)) + return; + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + qp = connect_discovery_adminq(trtype, address, port, opt.hostnqn); + free(tofree); + + /* Use Identify to fetch controller data */ + if (opt.verbose) { + identify_controller(qp); + printf("\n"); + } + + /* Fetch Log pages */ + dump_discovery_log_page(qp); + + nvmf_free_qpair(qp); +} + +static const struct opts discover_opts[] = { +#define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("transport", 't', arg_string, opt, transport, + "Transport type"), + OPT("hostnqn", 'q', arg_string, opt, hostnqn, + "Host NQN"), + OPT("verbose", 'v', arg_none, opt, verbose, + "Display the discovery controller's controller data"), + { NULL, 0, arg_none, NULL, NULL } +}; +#undef OPT + +static const struct args discover_args[] = { + { arg_string, &opt.address, "address" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd discover_cmd = { + .name = "discover", + .fn = discover, + .descr = "List discovery log pages from a fabrics controller", + .ctx_size = sizeof(opt), + .opts = discover_opts, + .args = discover_args, +}; + +CMD_COMMAND(discover_cmd); diff --git a/sbin/nvmecontrol/fabrics.c b/sbin/nvmecontrol/fabrics.c new file mode 100644 index 000000000000..6470e4062b39 --- /dev/null +++ b/sbin/nvmecontrol/fabrics.c @@ -0,0 +1,520 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fabrics.h" + +/* + * Subroutines shared by several Fabrics commands. + */ +static char nqn[NVMF_NQN_MAX_LEN]; +static uint8_t hostid[16]; +static bool hostid_initted = false; + +static bool +init_hostid(void) +{ + int error; + + if (hostid_initted) + return (true); + + error = nvmf_hostid_from_hostuuid(hostid); + if (error != 0) { + warnc(error, "Failed to generate hostid"); + return (false); + } + error = nvmf_nqn_from_hostuuid(nqn); + if (error != 0) { + warnc(error, "Failed to generate host NQN"); + return (false); + } + + hostid_initted = true; + return (true); +} + +void +nvmf_parse_address(const char *in_address, const char **address, + const char **port, char **tofree) +{ + char *cp; + + /* + * Accepts the following address formats: + * + * [IPv6 address]:port + * IPv4 address:port + * hostname:port + * [IPv6 address] + * IPv6 address + * IPv4 address + * hostname + */ + if (in_address[0] == '[') { + /* IPv6 address in square brackets. */ + cp = strchr(in_address + 1, ']'); + if (cp == NULL || cp == in_address + 1) + errx(EX_USAGE, "Invalid address %s", in_address); + *tofree = strndup(in_address + 1, cp - (in_address + 1)); + *address = *tofree; + + /* Skip over ']' */ + cp++; + switch (*cp) { + case '\0': + *port = NULL; + return; + case ':': + if (cp[1] != '\0') { + *port = cp + 1; + return; + } + /* FALLTHROUGH */ + default: + errx(EX_USAGE, "Invalid address %s", in_address); + } + } + + /* Look for the first colon. */ + cp = strchr(in_address, ':'); + if (cp == NULL) { + *address = in_address; + *port = NULL; + *tofree = NULL; + return; + } + + /* If there is another colon, assume this is an IPv6 address. */ + if (strchr(cp + 1, ':') != NULL) { + *address = in_address; + *port = NULL; + *tofree = NULL; + return; + } + + /* Both strings on either side of the colon must be non-empty. */ + if (cp == in_address || cp[1] == '\0') + errx(EX_USAGE, "Invalid address %s", in_address); + + *tofree = strndup(in_address, cp - in_address); + *address = *tofree; + + /* Skip over ':' */ + *port = cp + 1; +} + +uint16_t +nvmf_parse_cntlid(const char *cntlid) +{ + u_long value; + + if (strcasecmp(cntlid, "dynamic") == 0) + return (NVMF_CNTLID_DYNAMIC); + else if (strcasecmp(cntlid, "static") == 0) + return (NVMF_CNTLID_STATIC_ANY); + else { + value = strtoul(cntlid, NULL, 0); + + if (value > NVMF_CNTLID_STATIC_MAX) + errx(EX_USAGE, "Invalid controller ID"); + + return (value); + } +} + +bool +tcp_qpair_params(struct nvmf_qpair_params *params, int adrfam, + const char *address, const char *port) +{ + struct addrinfo hints, *ai, *list; + int error, s; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = adrfam; + hints.ai_protocol = IPPROTO_TCP; + error = getaddrinfo(address, port, &hints, &list); + if (error != 0) { + warnx("%s", gai_strerror(error)); + return (false); + } + + for (ai = list; ai != NULL; ai = ai->ai_next) { + s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (s == -1) + continue; + + if (connect(s, ai->ai_addr, ai->ai_addrlen) != 0) { + close(s); + continue; + } + + params->tcp.fd = s; + freeaddrinfo(list); + return (true); + } + warn("Failed to connect to controller at %s:%s", address, port); + return (false); +} + +static void +tcp_discovery_association_params(struct nvmf_association_params *params) +{ + params->tcp.pda = 0; + params->tcp.header_digests = false; + params->tcp.data_digests = false; + params->tcp.maxr2t = 1; +} + +struct nvmf_qpair * +connect_discovery_adminq(enum nvmf_trtype trtype, const char *address, + const char *port, const char *hostnqn) +{ + struct nvmf_association_params aparams; + struct nvmf_qpair_params qparams; + struct nvmf_association *na; + struct nvmf_qpair *qp; + uint64_t cap, cc, csts; + int error, timo; + + memset(&aparams, 0, sizeof(aparams)); + aparams.sq_flow_control = false; + switch (trtype) { + case NVMF_TRTYPE_TCP: + /* 7.4.9.3 Default port for discovery */ + if (port == NULL) + port = "8009"; + tcp_discovery_association_params(&aparams); + break; + default: + errx(EX_UNAVAILABLE, "Unsupported transport %s", + nvmf_transport_type(trtype)); + } + + if (!init_hostid()) + exit(EX_IOERR); + if (hostnqn != NULL) { + if (!nvmf_nqn_valid(hostnqn)) + errx(EX_USAGE, "Invalid HostNQN %s", hostnqn); + } else + hostnqn = nqn; + + na = nvmf_allocate_association(trtype, false, &aparams); + if (na == NULL) + err(EX_IOERR, "Failed to create discovery association"); + memset(&qparams, 0, sizeof(qparams)); + qparams.admin = true; + if (!tcp_qpair_params(&qparams, AF_UNSPEC, address, port)) + exit(EX_NOHOST); + qp = nvmf_connect(na, &qparams, 0, NVME_MIN_ADMIN_ENTRIES, hostid, + NVMF_CNTLID_DYNAMIC, NVMF_DISCOVERY_NQN, hostnqn, 0); + if (qp == NULL) + errx(EX_IOERR, "Failed to connect to discovery controller: %s", + nvmf_association_error(na)); + nvmf_free_association(na); + + /* Fetch Controller Capabilities Property */ + error = nvmf_read_property(qp, NVMF_PROP_CAP, 8, &cap); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch CAP"); + + /* Set Controller Configuration Property (CC.EN=1) */ + error = nvmf_read_property(qp, NVMF_PROP_CC, 4, &cc); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch CC"); + + /* Clear known fields preserving any reserved fields. */ + cc &= ~(NVMEM(NVME_CC_REG_SHN) | NVMEM(NVME_CC_REG_AMS) | + NVMEM(NVME_CC_REG_MPS) | NVMEM(NVME_CC_REG_CSS)); + + /* Leave AMS, MPS, and CSS as 0. */ + + cc |= NVMEF(NVME_CC_REG_EN, 1); + + error = nvmf_write_property(qp, NVMF_PROP_CC, 4, cc); + if (error != 0) + errc(EX_IOERR, error, "Failed to set CC"); + + /* Wait for CSTS.RDY in Controller Status */ + timo = NVME_CAP_LO_TO(cap); + for (;;) { + error = nvmf_read_property(qp, NVMF_PROP_CSTS, 4, &csts); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch CSTS"); + + if (NVMEV(NVME_CSTS_REG_RDY, csts) != 0) + break; + + if (timo == 0) + errx(EX_IOERR, "Controller failed to become ready"); + timo--; + usleep(500 * 1000); + } + + return (qp); +} + +/* + * XXX: Should this accept the admin queue size as a parameter rather + * than always using NVMF_MIN_ADMIN_MAX_SQ_SIZE? + */ +static int +connect_nvm_adminq(struct nvmf_association *na, + const struct nvmf_qpair_params *params, struct nvmf_qpair **qpp, + uint16_t cntlid, const char *subnqn, const char *hostnqn, uint32_t kato, + uint16_t *mqes) +{ + struct nvmf_qpair *qp; + uint64_t cap, cc, csts; + u_int mps, mpsmin, mpsmax; + int error, timo; + + qp = nvmf_connect(na, params, 0, NVMF_MIN_ADMIN_MAX_SQ_SIZE, hostid, + cntlid, subnqn, hostnqn, kato); + if (qp == NULL) { + warnx("Failed to connect to NVM controller %s: %s", subnqn, + nvmf_association_error(na)); + return (EX_IOERR); + } + + /* Fetch Controller Capabilities Property */ + error = nvmf_read_property(qp, NVMF_PROP_CAP, 8, &cap); + if (error != 0) { + warnc(error, "Failed to fetch CAP"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + /* Require the NVM command set. */ + if (NVME_CAP_HI_CSS_NVM(cap >> 32) == 0) { + warnx("Controller %s does not support the NVM command set", + subnqn); + nvmf_free_qpair(qp); + return (EX_UNAVAILABLE); + } + + *mqes = NVME_CAP_LO_MQES(cap); + + /* Prefer native host page size if it fits. */ + mpsmin = NVMEV(NVME_CAP_HI_REG_MPSMIN, cap >> 32); + mpsmax = NVMEV(NVME_CAP_HI_REG_MPSMAX, cap >> 32); + mps = ffs(getpagesize()) - 1; + if (mps < mpsmin + NVME_MPS_SHIFT) + mps = mpsmin; + else if (mps > mpsmax + NVME_MPS_SHIFT) + mps = mpsmax; + else + mps -= NVME_MPS_SHIFT; + + /* Configure controller. */ + error = nvmf_read_property(qp, NVMF_PROP_CC, 4, &cc); + if (error != 0) { + warnc(error, "Failed to fetch CC"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + /* Clear known fields preserving any reserved fields. */ + cc &= ~(NVMEM(NVME_CC_REG_IOCQES) | NVMEM(NVME_CC_REG_IOSQES) | + NVMEM(NVME_CC_REG_SHN) | NVMEM(NVME_CC_REG_AMS) | + NVMEM(NVME_CC_REG_MPS) | NVMEM(NVME_CC_REG_CSS)); + + cc |= NVMEF(NVME_CC_REG_IOCQES, 4); /* CQE entry size == 16 */ + cc |= NVMEF(NVME_CC_REG_IOSQES, 6); /* SEQ entry size == 64 */ + cc |= NVMEF(NVME_CC_REG_AMS, 0); /* AMS 0 (Round-robin) */ + cc |= NVMEF(NVME_CC_REG_MPS, mps); + cc |= NVMEF(NVME_CC_REG_CSS, 0); /* NVM command set */ + cc |= NVMEF(NVME_CC_REG_EN, 1); /* EN = 1 */ + + error = nvmf_write_property(qp, NVMF_PROP_CC, 4, cc); + if (error != 0) { + warnc(error, "Failed to set CC"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + /* Wait for CSTS.RDY in Controller Status */ + timo = NVME_CAP_LO_TO(cap); + for (;;) { + error = nvmf_read_property(qp, NVMF_PROP_CSTS, 4, &csts); + if (error != 0) { + warnc(error, "Failed to fetch CSTS"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + if (NVMEV(NVME_CSTS_REG_RDY, csts) != 0) + break; + + if (timo == 0) { + warnx("Controller failed to become ready"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + timo--; + usleep(500 * 1000); + } + + *qpp = qp; + return (0); +} + +static void +shutdown_controller(struct nvmf_qpair *qp) +{ + uint64_t cc; + int error; + + error = nvmf_read_property(qp, NVMF_PROP_CC, 4, &cc); + if (error != 0) { + warnc(error, "Failed to fetch CC"); + goto out; + } + + cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); + + error = nvmf_write_property(qp, NVMF_PROP_CC, 4, cc); + if (error != 0) { + warnc(error, "Failed to set CC to trigger shutdown"); + goto out; + } + +out: + nvmf_free_qpair(qp); +} + +/* Returns a value from */ +int +connect_nvm_queues(const struct nvmf_association_params *aparams, + enum nvmf_trtype trtype, int adrfam, const char *address, + const char *port, uint16_t cntlid, const char *subnqn, const char *hostnqn, + uint32_t kato, struct nvmf_qpair **admin, struct nvmf_qpair **io, + u_int num_io_queues, u_int queue_size, struct nvme_controller_data *cdata) +{ + struct nvmf_qpair_params qparams; + struct nvmf_association *na; + u_int queues; + int error; + uint16_t mqes; + + switch (trtype) { + case NVMF_TRTYPE_TCP: + break; + default: + warnx("Unsupported transport %s", nvmf_transport_type(trtype)); + return (EX_UNAVAILABLE); + } + + if (!init_hostid()) + return (EX_IOERR); + if (hostnqn != NULL) { + if (!nvmf_nqn_valid(hostnqn)) { + warnx("Invalid HostNQN %s", hostnqn); + return (EX_USAGE); + } + } else + hostnqn = nqn; + + /* Association. */ + na = nvmf_allocate_association(trtype, false, aparams); + if (na == NULL) { + warn("Failed to create association for %s", subnqn); + return (EX_IOERR); + } + + /* Admin queue. */ + memset(&qparams, 0, sizeof(qparams)); + qparams.admin = true; + if (!tcp_qpair_params(&qparams, adrfam, address, port)) { + nvmf_free_association(na); + return (EX_NOHOST); + } + error = connect_nvm_adminq(na, &qparams, admin, cntlid, subnqn, hostnqn, + kato, &mqes); + if (error != 0) { + nvmf_free_association(na); + return (error); + } + + /* Validate I/O queue size. */ + if (queue_size == 0) + queue_size = mqes + 1; + else if (queue_size > mqes + 1) { + shutdown_controller(*admin); + nvmf_free_association(na); + warn("I/O queue size exceeds controller maximum (%u)", + mqes + 1); + return (EX_USAGE); + } + + /* Fetch controller data. */ + error = nvmf_host_identify_controller(*admin, cdata); + if (error != 0) { + shutdown_controller(*admin); + nvmf_free_association(na); + warnc(error, "Failed to fetch controller data for %s", subnqn); + return (EX_IOERR); + } + + nvmf_update_assocation(na, cdata); + + error = nvmf_host_request_queues(*admin, num_io_queues, &queues); + if (error != 0) { + shutdown_controller(*admin); + nvmf_free_association(na); + warnc(error, "Failed to request I/O queues"); + return (EX_IOERR); + } + if (queues < num_io_queues) { + shutdown_controller(*admin); + nvmf_free_association(na); + warnx("Controller enabled fewer I/O queues (%u) than requested (%u)", + queues, num_io_queues); + return (EX_PROTOCOL); + } + + /* I/O queues. */ + memset(io, 0, sizeof(io) * num_io_queues); + for (u_int i = 0; i < num_io_queues; i++) { + memset(&qparams, 0, sizeof(qparams)); + qparams.admin = false; + if (!tcp_qpair_params(&qparams, adrfam, address, port)) { + error = EX_NOHOST; + goto out; + } + io[i] = nvmf_connect(na, &qparams, i + 1, queue_size, hostid, + nvmf_cntlid(*admin), subnqn, hostnqn, 0); + if (io[i] == NULL) { + warnx("Failed to create I/O queue: %s", + nvmf_association_error(na)); + error = EX_IOERR; + goto out; + } + } + nvmf_free_association(na); + return (0); + +out: + for (u_int i = 0; i < num_io_queues; i++) { + if (io[i] == NULL) + break; + nvmf_free_qpair(io[i]); + } + shutdown_controller(*admin); + nvmf_free_association(na); + return (error); +} diff --git a/sbin/nvmecontrol/fabrics.h b/sbin/nvmecontrol/fabrics.h new file mode 100644 index 000000000000..9d6ee24b88fb --- /dev/null +++ b/sbin/nvmecontrol/fabrics.h @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#ifndef __FABRICS_H__ +#define __FABRICS_H__ + +/* + * Splits 'in_address' into separate 'address' and 'port' strings. If + * a separate buffer for the address was allocated, 'tofree' is set to + * the allocated buffer, otherwise 'tofree' is set to NULL. + */ +void nvmf_parse_address(const char *in_address, const char **address, + const char **port, char **tofree); + +uint16_t nvmf_parse_cntlid(const char *cntlid); + +/* Returns true if able to open a connection. */ +bool tcp_qpair_params(struct nvmf_qpair_params *params, int adrfam, + const char *address, const char *port); + +/* Connect to a discovery controller and return the Admin qpair. */ +struct nvmf_qpair *connect_discovery_adminq(enum nvmf_trtype trtype, + const char *address, const char *port, const char *hostnqn); + +/* + * Connect to an NVM controller establishing an Admin qpair and one or + * more I/O qpairs. The controller's controller data is returned in + * *cdata on success. Returns a non-zero value from on + * failure. + */ +int connect_nvm_queues(const struct nvmf_association_params *aparams, + enum nvmf_trtype trtype, int adrfam, const char *address, + const char *port, uint16_t cntlid, const char *subnqn, const char *hostnqn, + uint32_t kato, struct nvmf_qpair **admin, struct nvmf_qpair **io, + u_int num_io_queues, u_int queue_size, struct nvme_controller_data *cdata); + +#endif /* !__FABRICS_H__ */ diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8 index 1310184ac309..6f7b45aac607 100644 --- a/sbin/nvmecontrol/nvmecontrol.8 +++ b/sbin/nvmecontrol/nvmecontrol.8 @@ -1,856 +1,1017 @@ .\" .\" Copyright (c) 2020 Warner Losh .\" Copyright (c) 2018-2019 Alexander Motin .\" Copyright (c) 2012 Intel Corporation .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" nvmecontrol man page. .\" .\" Author: Jim Harris .\" .Dd May 3, 2024 .Dt NVMECONTROL 8 .Os .Sh NAME .Nm nvmecontrol .Nd NVM Express control utility .Sh SYNOPSIS .Nm .Ic devlist .Op Fl h .Nm .Ic identify .Op Fl v .Op Fl x .Op Fl n Ar nsid .Aq Ar device-id | Ar namespace-id .Nm .Ic perftest .Aq Fl n Ar num_threads .Aq Fl o Ar read|write .Op Fl p .Aq Fl s Ar size_in_bytes .Aq Fl t Ar time_in_sec .Aq Ar namespace-id .Nm .Ic reset .Aq Ar device-id .Nm .Ic logpage .Aq Fl p Ar page_id .Op Fl x .Op Fl v Ar vendor-string .Op Fl b .Op Fl f Ar LSP .Op Fl i Ar LSI .Op Fl r .Aq Ar device-id | Ar namespace-id .Nm .Ic ns active .Aq Ar device-id .Nm .Ic ns allocated .Aq Ar device-id .Nm .Ic ns attach .Aq Fl n Ar nsid .Aq Fl c Ar cntid .Aq Ar device-id .Nm .Ic ns attached .Aq Fl n Ar nsid .Aq Ar device-id .Nm .Ic ns controllers .Aq Ar device-id .Nm .Ic ns create .Aq Fl s Ar nsze .Op Fl c Ar ncap .Op Fl f Ar lbaf .Op Fl m Ar mset .Op Fl n Ar nmic .Op Fl p Ar pi .Op Fl l Ar pil .Op Fl L Ar flbas .Op Fl d Ar dps .Aq Ar device-id .Nm .Ic ns delete .Aq Fl n Ar nsid .Aq Ar device-id .Nm .Ic ns detach .Aq Fl n Ar nsid .Aq Fl c Ar cntid .Aq Ar device-id .Nm .Ic ns identify .Op Fl v .Op Fl x .Aq Fl n Ar nsid .Aq Ar device-id .Nm .Ic nsid .Aq Ar device-id | Ar namespace-id .Nm .Ic resv acquire .Aq Fl c Ar crkey .Op Fl p Ar prkey .Aq Fl t Ar rtype .Aq Fl a Ar racqa .Aq Ar namespace-id .Nm .Ic resv register .Op Fl i .Op Fl c Ar crkey .Aq Fl k Ar nrkey .Aq Fl r Ar rrega .Op Fl p Ar cptpl .Aq Ar namespace-id .Nm .Ic resv release .Aq Fl c Ar crkey .Aq Fl t Ar rtype .Aq Fl a Ar rrela .Aq Ar namespace-id .Nm .Ic resv report .Op Fl e .Op Fl v .Op Fl x .Aq Ar namespace-id .Nm .Ic firmware .Op Fl s Ar slot .Op Fl f Ar path_to_firmware .Op Fl a .Aq Ar device-id .Nm .Ic format .Op Fl f Ar fmt .Op Fl m Ar mset .Op Fl p Ar pi .Op Fl l Ar pil .Op Fl E .Op Fl C .Aq Ar device-id | Ar namespace-id .Nm .Ic sanitize .Aq Fl a Ar sanact .Op Fl c Ar owpass .Op Fl d .Op Fl p Ar ovrpat .Op Fl r .Op Fl I .Op Fl U .Aq Ar device-id .Nm .Ic power .Op Fl l .Op Fl p power_state .Op Fl w workload_hint .Nm .Ic selftest .Aq Fl c Ar code .Aq Ar device-id | Ar namespace-id .Nm .Ic wdc cap-diag .Op Fl o path_template .Aq Ar device-id .Nm .Ic wdc drive-log .Op Fl o path_template .Aq Ar device-id .Nm .Ic wdc get-crash-dump .Op Fl o path_template .Aq Ar device-id .\" .Nm .\" .Ic wdc purge .\" .Aq device-id .\" .Nm .\" .Ic wdc purge-monitor .\" .Aq device-id .Nm .Ic admin-passthru .Op args .Aq Ar device-id .Nm .Ic io-passthru .Op args .Aq Ar namespace-id +.Nm +.Ic discover +.Op Fl v +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Nm +.Ic connect +.Op Fl FGg +.Op Fl c Ar cntl-id +.Op Fl i Ar queues +.Op Fl k Ar seconds +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Op Fl Q Ar entries +.Aq Ar address +.Aq Ar SubNQN +.Nm +.Ic connect-all +.Op Fl FGg +.Op Fl i Ar queues +.Op Fl k Ar seconds +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Op Fl Q Ar entries +.Aq Ar address +.Nm +.Ic disconnect +.Aq Ar device-id | Ar namespace-id | Ar SubNQN +.Nm +.Ic reconnect +.Op Fl FGg +.Op Fl i Ar queues +.Op Fl k Ar seconds +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Op Fl Q Ar entries +.Aq Ar device-id +.Aq Ar address .Sh DESCRIPTION -NVM Express (NVMe) is a storage protocol standard, for SSDs and other -high-speed storage devices over PCI Express. +NVM Express (NVMe) is a storage protocol standard for SSDs and other +high-speed storage devices over PCI Express as well as remote storage +devices accessed via a network fabric. .Ss devlist List all NVMe controllers and namespaces along with their device nodes. With the .Fl h argument, use unit suffixes: Byte, Kibibyte, Mebibyte, Gibibyte, Tebibyte and Pebibyte (based on powers of 1024) when showing the disk space. By default, uses Mebibyte. .Ss identify The identify commands reports information from the drive's .Dv IDENTIFY_CONTROLLER if a .Ar device-id is specified. It reports .Dv IDENTIFY_NAMESPACE data if a .Ar namespace-id is specified. When used with disk names, the .Dv IDENTIFY_NAMESPACE data is reported, unless the namespace .Ar nsid is overridden with the .Fl n flag. Then that namespace's data is reported, if it exists. The command accepts the following parameters: .Bl -tag -width 6n .It Fl n The namespace .Aq nsid to use instead of the namespace associated with the device. A .Ar nsid of .Dq 0 is used to retrieve the .Dv IDENTIFY_CONTROLLER data associated with that drive. .El .Ss logpage The logpage command knows how to print log pages of various types. It also knows about vendor specific log pages from hgst/wdc, samsung and intel. Note that some vendors use the same log page numbers for different data. .Pp .Bl -tag -compact -width "Page 0x00" .It Dv Page 0x01 Drive Error Log .It Dv Page 0x02 Health/SMART Data .It Dv Page 0x03 Firmware Information .It Dv Page 0x04 Changed Namespace List .It Dv Page 0x05 Commands Supported and Effects .It Dv Page 0x06 Device Self-test .It Dv Page 0x80 Reservation Notification .It Dv Page 0x81 Sanitize Status .It Dv Page 0xc1 Advanced SMART information (WDC/HGST) .It Dv Page 0xc1 Read latency stats (Intel) .It Dv Page 0xc2 Wite latency stats (Intel) .It Dv Page 0xc5 Temperature stats (Intel) .It Dv Page 0xca Advanced SMART information (Intel) .It Dv Page 0xca Extended SMART information (Samsung) .El .Pp Specifying .Fl v .Ic help will list all valid vendors and pages. .Fl x will print the page as hex. .Fl b will print the binary data for the page. .Fl s will set Log Specific Field. .Fl i will set Log Specific Identifier. .Fl r will set Retain Asynchronous Event. .Ss ns Various namespace management commands. If namespace management is supported by device, allow list, create and delete namespaces, list, attach and detach controllers to namespaces. Each NVM device consists of one or more NVM subsystems. Each NVM subsystem has one or more NVM ports. Each NVM port is attached to one or more NVM controllers (though typically 1). Each NVM controller is attached to one or more namespaces. .Pp After a namespace is created, it is considered .Dq allocated . All namespaces that have not been created are unallocated. An allocated namespace may be active or inactive. An active namespace is attached to the controller and may be interacted with. A namespace can move from active to inactive when detached. An allocated namespace may be deleted to become unallocated. For more details on the nuances of NVM namespaces, please see section 2 .Em Theory of Operation and section 3 .Em NVM Express Architecture of the latest NVM standard. .Ss ns active Provide a list of active namespace identifiers for the givne NVM controller. .Ss ns allocated Provide a list of allocated namespace identifiers for the givne NVM controller. .Ss ns attach Attach an nsid to a controller. The primary controller is used if one is not specified. .Ss ns attached Provide a list of controllers attached to a nsid. If only a nvme controller argument is provided, a nsid must also be specified. .Ss ns controllers Provide a list of all controllers in the NVM subsystem. .Ss ns create Creates a new namespace. .Ss ns delete Delete a namespace. It must be currently inactive. .Ss ns detach Detach a namespace from a controller. The namespace will become inaccessible, but its contents will remain if it is .Em activated again. .Ss ns identify Print detailed information about the namespace. .Ss nsid Reports the namespace id and controller device associated with the .Aq Ar namespace-id or .Aq Ar device-id argument. .Ss resv acquire Acquire or preempt namespace reservation, using specified parameters: .Bl -tag -width 6n .It Fl a Acquire action: .Bl -tag -compact -width 6n .It Dv 0 Acquire .It Dv 1 Preempt .It Dv 2 Preempt and abort .El .It Fl c Current reservation key. .It Fl p Preempt reservation key. .It Fl t Reservation type: .Bl -tag -compact -width 6n .It Dv 1 Write Exclusive .It Dv 2 Exclusive Access .It Dv 3 Write Exclusive - Registrants Only .It Dv 4 Exclusive Access - Registrants Only .It Dv 5 Write Exclusive - All Registrants .It Dv 6 Exclusive Access - All Registrants .El .El .Ss resv register Register, unregister or replace reservation key, using specified parameters: .Bl -tag -width 6n .It Fl c Current reservation key. .It Fl k New reservation key. .It Fl r Register action: .Bl -tag -compact -width 6n .It Dv 0 Register .It Dv 1 Unregister .It Dv 2 Replace .El .It Fl i Ignore Existing Key .It Fl p Change Persist Through Power Loss State: .Bl -tag -compact -width 6n .It Dv 0 No change to PTPL state .It Dv 2 Set PTPL state to ‘0’. Reservations are released and registrants are cleared on a power on. .It Dv 3 Set PTPL state to ‘1’. Reservations and registrants persist across a power loss. .El .El .Ss resv release Release or clear reservation, using specified parameters: .Bl -tag -width 6n .It Fl c Current reservation key. .It Fl t Reservation type. .It Fl a Release action: .Bl -tag -compact -width 6n .It Dv 0 Release .It Dv 1 Clean .El .El .Ss resv report Print reservation status, using specified parameters: .Bl -tag -width 6n .It Fl x Print reservation status in hex. .It Fl e Use Extended Data Structure. .El .Ss format Format either specified namespace, or all namespaces of specified controller, using specified parameters: .Bl -tag -width 8n .It Fl f Ar fmt The index .Ar fmt of the parameters to use. LBA Format #, as specified in the identification of the namespace using .Dq nvmecontrol identify command with a namespace specified maps this index into these parameters. .It Fl m Ar mset Metadata Setting. .Ar mset .Bl -tag -compact -width 6n .It Dv 0 do not transfer metadata with LBA information .It Dv 1 Transfer the metadata as part of the extended LBA information. .El .It Fl p Ar pi Protection Information. .Bl -tag -compact -width 6n .It Dv 0 Protection Information not enabled. .It Dv 1 Type 1 information protection enabled. .It Dv 2 Type 2 information protection enabled. .It Dv 3 Type 3 information protection enabled. .El .It Fl l Ar pil Protection Information Location. .Bl -tag -compact -width 6n .It Dv 0 Transfer the protection metadata as the last N bytes of the transfer. .It Dv 1 Transfer the protection metadata as the first N bytes of the transfer. .El .It Fl E Enables User Data Erase during format. All users data is erased and subsequent reads are indeterminate. The drive may implement this as a cryptographic erase or it may physically erase the underlying media. .It Fl C Enables Cryptographic Erase during format. All user data is erased cryptographically by deleting the encryption key, rendering it unintelligible. .El .Pp When formatting specific namespace, existing values are used as defaults. When formatting all namespaces, all parameters should be specified. Some controllers may not support formatting or erasing specific or all namespaces. The .Xr nvme 4 driver does not currently support metadata and protection information transfers. .Ss sanitize Sanitize NVM subsystem of specified controller, using specified parameters: .Bl -tag -width 6n .It Fl a Ar operation Specify the sanitize operation to perform. .Bl -tag -width 16n .It overwrite Perform an overwrite operation by writing a user supplied data pattern to the device one or more times. The pattern is given by the .Fl p argument. The number of times is given by the .Fl c argument. .It block Perform a block erase operation. All the device's blocks are set to a vendor defined value, typically zero. .It crypto Perform a cryptographic erase operation. The encryption keys are changed to prevent the decryption of the data. .It exitfailure Exits a previously failed sanitize operation. A failed sanitize operation can only be exited if it was run in the unrestricted completion mode, as provided by the .Fl U argument. .El .It Fl c Ar passes The number of passes when performing an .Sq overwrite operation. Valid values are between 1 and 16. The default is 1. .It Fl d No Deallocate After Sanitize. .It Fl I When performing an .Sq overwrite operation, the pattern is inverted between consecutive passes. .It Fl p Ar pattern 32 bits of pattern to use when performing an .Sq overwrite operation. The pattern is repeated as needed to fill each block. .It Fl U Perform the sanitize in the unrestricted completion mode. If the operation fails, it can later be exited with the .Sq exitfailure operation. .It Fl r Run in .Dq report only mode. This will report status on a sanitize that is already running on the drive. .El .Ss power Manage the power modes of the NVMe controller. .Bl -tag -width 6n .It Fl l List all supported power modes. .It Fl p Ar mode Set the power mode to .Ar mode . This must be a mode listed with the .Dl nvmecontrol power -l command. .It Fl w Ar hint Set the workload hint for automatic power mode control. .Bl -tag -compact -width 6n .It 0 No workload hint is provided. .It 1 Extended idle period workload. The device is often idle for minutes at a time. A burst of write commands comes in over a period of seconds. Then the device returns to being idle. .It 2 Heavy sequential writes. A huge number of sequential writes will be submitted, filling the submission queues. .It Other All other values are reserved and have no standard meaning. .El Please see the .Dq NVM Subsystem Workloads section of the relevant NVM Express Base Standard for details. .El .Ss selftest Start the specified device self-test: .Bl -tag -width 6n .It Fl c Ar code Specify the device self-test command code. Common codes are: .Bl -tag -compact -width 6n .It Dv 0x1 Start a short device self-test operation .It Dv 0x2 Start an extended device self-test operation .It Dv 0xe Start a vendor specific device self-test operation .It Dv 0xf Abort the device self-test operation .El .El .Ss wdc The various wdc command retrieve log data from the wdc/hgst drives. The .Fl o flag specifies a path template to use to output the files. Each file takes the path template (which defaults to nothing), appends the drive's serial number and the type of dump it is followed by .bin. These logs must be sent to the vendor for analysis. This tool only provides a way to extract them. .Ss passthru The .Dq admin-passthru and .Dq io-passthru commands send NVMe commands to either the administrative or the data part of the device. These commands are expected to be compatible with nvme-cli. Please see the NVM Express Base Standard for details. .Bl -tag -width 16n .It Fl o -opcode Ar opcode Opcode to send. .It Fl 2 -cdw2 Ar value 32-bit value for CDW2. .It Fl 3 -cdw3 Ar value 32-bit value for CDW3. .It Fl 4 -cdw10 Ar value 32-bit value for CDW10. .It Fl 5 -cdw11 Ar value 32-bit value for CDW11. .It Fl 6 -cdw12 Ar value 32-bit value for CDW12. .It Fl 7 -cdw13 Ar value 32-bit value for CDW13. .It Fl 8 -cdw14 Ar value 32-bit value for CDW14. .It Fl 9 -cdw15 Ar value 32-bit value for CDW15. .It Fl l -data-len Length of the data for I/O (bytes). .It Fl m -metadata-len Length of the metadata segment for command (bytes). This is ignored and not implemented in .Xr nvme 4 . .It Fl f -flags Nvme command flags. .It Fl n -namespace-id Namespace ID for command (Ignored). .It Fl p -prefill Value to prefill payload with. .It Fl b -raw-binary Output in binary format (otherwise a hex dump is produced). .It Fl d -dry-run Do not actually execute the command, but perform sanity checks on it. .It Fl r -read Command reads data from the device. .It Fl s -show-command Show all the command values on stdout. .It Fl w -write Command writes data to the device. .El .Pp Send arbitrary commands to the device. Can be used to extract vendor specific logs. Transfers to/from the device possible, but limited to .Dv MAXPHYS bytes. Commands either read data or write it, but not both. Commands needing metadata are not supported by the .Xr nvme 4 drive. +.Ss discover +List the remote controllers advertised by a remote Discovery Controller: +.Bl -tag -width 6n +.It Fl t Ar transport +Transport to use. +The default is +.It Fl q Ar HostNQN +NVMe Qualified Name to use for this host. +By default an NQN is auto-generated from the current host's UUID. +.Ar tcp . +.It Fl v +Display the +.Dv IDENTIFY_CONTROLLER +data for the Discovery Controller. +.El +.Ss connect +Establish an association with the I/O controller named +.Ar SubNQN +at +.Ar address . +The address must include a port. +.Pp +An admin queue pair and one or more I/O queue pairs are created and handed +off to the kernel to create a new controller device. +.Bl -tag -width 6n +.It Fl c Ar cntl-id +Remote controller ID to request: +.Bl -tag +.It dynamic +Request a dynamic controller ID for controllers using the dynamic +controller model. +This is the default. +.It static +Request a dynamic controller ID for controllers using the static +controller model. +.It Ar number +Request a specific controller ID for controllers using the static +controller model. +.El +.It Fl F +Request submission queue flow control. +By default submission queue flow control is disabled unless the remote +controller requires it. +.It Fl g +Enable TCP PDU header digests. +.It Fl G +Enable TCP PDU data digests. +.It Fl i Ar queues +Number of I/O queue pairs to create. +The default is 1. +.It Fl k Ar seconds +Keep Alive timer duration in seconds. +The default is 120. +.It Fl t Ar transport +Transport to use. +The default is +.Ar tcp . +.It Fl q Ar HostNQN +NVMe Qualified Name to use for this host. +By default an NQN is auto-generated from the current host's UUID. +.It Fl Q Ar entries +Number of entries in each I/O queue. +By default the maximum queue size reported by the MQES field +of the remote host's CAP property is used. +.El +.Ss connect-all +Query the Discovery Controller at +.Ar address +and establish an association for each advertised I/O controller. +The +.Fl t +flag determines the transport used for the initial association with +the Discovery Controller and defaults to +.Ar tcp . +All other flags are used to control properties of each I/O assocation as +described above for the +.Cm connect +command. +.Ss disconnect +Delete the controller device associated with a remote I/O controller +including any active association and open queues. +.Ss reconnect +Reestablish an association for the remote I/O controller associated with +.Ar device-id +at +.Ar address . +The address must include a port. +The flags have the same meaning for the new association as described above +for the +.Cm connect +command. .Sh DEVICE NAMES Where .Aq Ar namespace-id is required, you can use either the .Pa nvmeXnsY device, or the disk device such as .Pa ndaZ or .Pa nvdZ . The leading .Pa /dev/ may be omitted. Where .Aq Ar device-id is required, you can use either the .Pa nvmeX device, or the disk device such as .Pa ndaZ or .Pa nvdZ . For commands that take an optional .Aq nsid you can use it to get information on other namespaces, or to query the drive itself. A .Aq nsid of .Dq 0 means query the drive itself. +.Sh FABRICS TRANSPORTS +The following NVM Express over Fabrics transports are supported for +accessing remote controllers: +.Bl -tag +.It tcp +TCP transport +.El +.Sh NETWORK ADDRESSES +Network addresses for remote controllers can use one of the following formats: +.Bl -bullet +.It +.Bq Ar IPv6 address +.Ns : Ns Ar port +.It +.Ar IPv4 address +.Ns : Ns Ar port +.It +.Ar hostname Ns : Ns Ar port +.It +.Bq Ar IPv6 address +.It +.Ar IPv6 address +.It +.Ar IPv4 address +.It +.Ar hostname +.El +.Pp +If a +.Ar port +is not provided, a default value is used if possible. .Sh EXAMPLES .Dl nvmecontrol devlist .Pp Display a list of NVMe controllers and namespaces along with their device nodes. .Pp .Dl nvmecontrol identify nvme0 .Dl nvmecontrol identify -n 0 nvd0 .Pp Display a human-readable summary of the nvme0 .Dv IDENTIFY_CONTROLLER data. In this example, nvd0 is connected to nvme0. .Pp .Dl nvmecontrol identify -x -v nvme0ns1 .Dl nvmecontrol identify -x -v -n 1 nvme0 .Pp Display an hexadecimal dump of the nvme0 .Dv IDENTIFY_NAMESPACE data for namespace 1. .Pp .Dl nvmecontrol perftest -n 32 -o read -s 512 -t 30 nvme0ns1 .Pp Run a performance test on nvme0ns1 using 32 kernel threads for 30 seconds. Each thread will issue a single 512 byte read command. Results are printed to stdout when 30 seconds expires. .Pp .Dl nvmecontrol reset nvme0 .Dl nvmecontrol reset nda4 .Pp Perform a controller-level reset of the nvme0 controller. In this example, nda4 is wired to nvme0. .Pp .Dl nvmecontrol logpage -p 1 nvme0 .Pp Display a human-readable summary of the nvme0 controller's Error Information Log. Log pages defined by the NVMe specification include Error Information Log (ID=1), SMART/Health Information Log (ID=2), and Firmware Slot Log (ID=3). .Pp .Dl nvmecontrol logpage -p 0xc1 -v wdc nvme0 .Pp Display a human-readable summary of the nvme0's wdc-specific advanced SMART data. .Pp .Dl nvmecontrol logpage -p 1 -x nvme0 .Pp Display a hexadecimal dump of the nvme0 controller's Error Information Log. .Pp .Dl nvmecontrol logpage -p 0xcb -b nvme0 > /tmp/page-cb.bin .Pp Print the contents of vendor specific page 0xcb as binary data on standard out. Redirect it to a temporary file. .Pp .Dl nvmecontrol firmware -s 2 -f /tmp/nvme_firmware nvme0 .Pp Download the firmware image contained in "/tmp/nvme_firmware" to slot 2 of the nvme0 controller, but do not activate the image. .Pp .Dl nvmecontrol firmware -s 4 -a nvme0 .Pp Activate the firmware in slot 4 of the nvme0 controller on the next reset. .Pp .Dl nvmecontrol firmware -s 7 -f /tmp/nvme_firmware -a nvme0 .Pp Download the firmware image contained in "/tmp/nvme_firmware" to slot 7 of the nvme0 controller and activate it on the next reset. .Pp .Dl nvmecontrol power -l nvme0 .Pp List all the current power modes. .Pp .Dl nvmecontrol power -p 3 nvme0 .Pp Set the current power mode. .Pp .Dl nvmecontrol power nvme0 .Pp Get the current power mode. .Pp .Dl nvmecontrol identify -n 0 nda0 .Pp Identify the drive data associated with the .Pa nda0 device. The corresponding .Pa nvmeX devices is used automatically. .Pp .Dl nvmecontrol identify nda0 .Pp Get the namespace parameters associated with the .Pa nda0 device. The corresponding .Pa nvmeXnsY device is used automatically. .Pp .Dl nvmecontrol format -f 2 -m 0 -p 0 -l 0 -C nvme2 .Pp Format all the name spaces on nvme2 using parameters from .Dq LBA Format #2 with no metadata or protection data using cryptographic erase. If the .Dq nvmecontrol identify -n 1 nvme2 command ended with .Pp .Bl -verbatim LBA Format #00: Data Size: 512 Metadata Size: 0 Performance: Good LBA Format #01: Data Size: 512 Metadata Size: 8 Performance: Good LBA Format #02: Data Size: 4096 Metadata Size: 0 Performance: Good LBA Format #03: Data Size: 4096 Metadata Size: 8 Performance: Good LBA Format #04: Data Size: 4096 Metadata Size: 64 Performance: Good .El .Pp then this would give a 4k data format for at least namespace 1, with no metadata. .Pp .Sh DYNAMIC LOADING The directories .Pa /lib/nvmecontrol and .Pa /usr/local/lib/nvmecontrol are scanned for any .so files. These files are loaded. The members of the .Va top linker set are added to the top-level commands. The members of the .Va logpage linker set are added to the logpage parsers. .Sh SEE ALSO .Rs .%T The NVM Express Base Specification .%D June 10, 2019 .%U https://nvmexpress.org/wp-content/uploads/NVM-Express-1_4-2019.06.10-Ratified.pdf .Re .Sh HISTORY The .Nm utility appeared in .Fx 9.2 . .Sh AUTHORS .An -nosplit .Nm was developed by Intel and originally written by .An Jim Harris Aq Mt jimharris@FreeBSD.org . .Pp This man page was written by .An Jim Harris Aq Mt jimharris@FreeBSD.org . diff --git a/sbin/nvmecontrol/reconnect.c b/sbin/nvmecontrol/reconnect.c new file mode 100644 index 000000000000..c8a010c038d0 --- /dev/null +++ b/sbin/nvmecontrol/reconnect.c @@ -0,0 +1,167 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" +#include "fabrics.h" + +/* + * See comment about other possible settings in connect.c. + */ + +static struct options { + const char *dev; + const char *transport; + const char *address; + const char *hostnqn; + uint32_t kato; + uint16_t num_io_queues; + uint16_t queue_size; + bool data_digests; + bool flow_control; + bool header_digests; +} opt = { + .dev = NULL, + .transport = "tcp", + .address = NULL, + .hostnqn = NULL, + .kato = NVMF_KATO_DEFAULT / 1000, + .num_io_queues = 1, + .queue_size = 0, + .data_digests = false, + .flow_control = false, + .header_digests = false, +}; + +static void +tcp_association_params(struct nvmf_association_params *params) +{ + params->tcp.pda = 0; + params->tcp.header_digests = opt.header_digests; + params->tcp.data_digests = opt.data_digests; + /* XXX */ + params->tcp.maxr2t = 1; +} + +static int +reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam, + const char *address, const char *port) +{ + struct nvme_controller_data cdata; + struct nvmf_association_params aparams; + struct nvmf_reconnect_params rparams; + struct nvmf_qpair *admin, **io; + int error; + + error = nvmf_reconnect_params(fd, &rparams); + if (error != 0) { + warnc(error, "Failed to fetch reconnect parameters"); + return (EX_IOERR); + } + + memset(&aparams, 0, sizeof(aparams)); + aparams.sq_flow_control = opt.flow_control; + switch (trtype) { + case NVMF_TRTYPE_TCP: + tcp_association_params(&aparams); + break; + default: + warnx("Unsupported transport %s", nvmf_transport_type(trtype)); + return (EX_UNAVAILABLE); + } + + io = calloc(opt.num_io_queues, sizeof(*io)); + error = connect_nvm_queues(&aparams, trtype, adrfam, address, port, + rparams.cntlid, rparams.subnqn, opt.hostnqn, opt.kato, &admin, io, + opt.num_io_queues, opt.queue_size, &cdata); + if (error != 0) + return (error); + + error = nvmf_reconnect_host(fd, admin, opt.num_io_queues, io, &cdata); + if (error != 0) { + warnc(error, "Failed to handoff queues to kernel"); + return (EX_IOERR); + } + free(io); + return (0); +} + +static void +reconnect_fn(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + const char *address, *port; + char *tofree; + int error, fd; + + if (arg_parse(argc, argv, f)) + return; + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + + open_dev(opt.dev, &fd, 1, 1); + if (port == NULL) + errx(EX_USAGE, "Explicit port required"); + + error = reconnect_nvm_controller(fd, trtype, AF_UNSPEC, address, port); + if (error != 0) + exit(error); + + close(fd); + free(tofree); +} + +static const struct opts reconnect_opts[] = { +#define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("transport", 't', arg_string, opt, transport, + "Transport type"), + OPT("nr-io-queues", 'i', arg_uint16, opt, num_io_queues, + "Number of I/O queues"), + OPT("queue-size", 'Q', arg_uint16, opt, queue_size, + "Number of entries in each I/O queue"), + OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato, + "Keep Alive timeout (in seconds)"), + OPT("hostnqn", 'q', arg_string, opt, hostnqn, + "Host NQN"), + OPT("flow_control", 'F', arg_none, opt, flow_control, + "Request SQ flow control"), + OPT("hdr_digests", 'g', arg_none, opt, header_digests, + "Enable TCP PDU header digests"), + OPT("data_digests", 'G', arg_none, opt, data_digests, + "Enable TCP PDU data digests"), + { NULL, 0, arg_none, NULL, NULL } +}; +#undef OPT + +static const struct args reconnect_args[] = { + { arg_string, &opt.dev, "controller-id" }, + { arg_string, &opt.address, "address" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd reconnect_cmd = { + .name = "reconnect", + .fn = reconnect_fn, + .descr = "Reconnect to a fabrics controller", + .ctx_size = sizeof(opt), + .opts = reconnect_opts, + .args = reconnect_args, +}; + +CMD_COMMAND(reconnect_cmd);