diff --git a/sbin/nvmecontrol/Makefile b/sbin/nvmecontrol/Makefile --- a/sbin/nvmecontrol/Makefile +++ b/sbin/nvmecontrol/Makefile @@ -3,7 +3,11 @@ PACKAGE=nvme-tools PROG= nvmecontrol SRCS+= comnd.c +SRCS+= connect.c SRCS+= devlist.c +SRCS+= disconnect.c +SRCS+= discover.c +SRCS+= fabrics.c SRCS+= firmware.c SRCS+= format.c SRCS+= identify.c @@ -17,13 +21,15 @@ SRCS+= passthru.c SRCS+= perftest.c SRCS+= power.c +SRCS+= reconnect.c SRCS+= reset.c SRCS+= resv.c SRCS+= sanitize.c SRCS+= selftest.c +CFLAGS+= -I${SRCTOP}/lib/libnvmf MAN= nvmecontrol.8 LDFLAGS+= -rdynamic -LIBADD+= util +LIBADD+= nvmf util SUBDIR= modules HAS_TESTS= SUBDIR.${MK_TESTS}+= tests diff --git a/sbin/nvmecontrol/connect.c b/sbin/nvmecontrol/connect.c new file mode 100644 --- /dev/null +++ b/sbin/nvmecontrol/connect.c @@ -0,0 +1,283 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "comnd.h" +#include "fabrics.h" + +/* + * Settings that are currently hardcoded but could be exposed to the + * user via additional command line options: + * + * - ADMIN queue entries + * - MaxR2T + */ + +static struct options { + const char *transport; + const char *address; + const char *cntlid; + const char *subnqn; + const char *hostnqn; + uint32_t kato; + uint16_t num_io_queues; + uint16_t queue_size; + bool data_digests; + bool flow_control; + bool header_digests; +} opt = { + .transport = "tcp", + .address = NULL, + .cntlid = "dynamic", + .subnqn = NULL, + .hostnqn = NULL, + .kato = NVMF_KATO_DEFAULT / 1000, + .num_io_queues = 1, + .queue_size = 0, + .data_digests = false, + .flow_control = false, + .header_digests = false, +}; + +static void +tcp_association_params(struct nvmf_association_params *params) +{ + params->tcp.pda = 0; + params->tcp.header_digests = opt.header_digests; + params->tcp.data_digests = opt.data_digests; + /* XXX */ + params->tcp.maxr2t = 1; +} + +static int +connect_nvm_controller(enum nvmf_trtype trtype, int adrfam, const char *address, + const char *port, uint16_t cntlid, const char *subnqn) +{ + struct nvme_controller_data cdata; + struct nvmf_association_params aparams; + struct nvmf_qpair *admin, **io; + int error; + + memset(&aparams, 0, sizeof(aparams)); + aparams.sq_flow_control = opt.flow_control; + switch (trtype) { + case NVMF_TRTYPE_TCP: + tcp_association_params(&aparams); + break; + default: + warnx("Unsupported transport %s", nvmf_transport_type(trtype)); + return (EX_UNAVAILABLE); + } + + io = calloc(opt.num_io_queues, sizeof(*io)); + error = connect_nvm_queues(&aparams, trtype, adrfam, address, port, + cntlid, subnqn, opt.hostnqn, opt.kato, &admin, io, + opt.num_io_queues, opt.queue_size, &cdata); + if (error != 0) + return (error); + + error = nvmf_handoff_host(admin, opt.num_io_queues, io, &cdata); + if (error != 0) { + warnc(error, "Failed to handoff queues to kernel"); + return (EX_IOERR); + } + free(io); + return (0); +} + +static void +connect_discovery_entry(struct nvme_discovery_log_entry *entry) +{ + int adrfam; + + switch (entry->trtype) { + case NVMF_TRTYPE_TCP: + switch (entry->adrfam) { + case NVMF_ADRFAM_IPV4: + adrfam = AF_INET; + break; + case NVMF_ADRFAM_IPV6: + adrfam = AF_INET6; + break; + default: + warnx("Skipping unsupported address family for %s", + entry->subnqn); + return; + } + switch (entry->tsas.tcp.sectype) { + case NVME_TCP_SECURITY_NONE: + break; + default: + warnx("Skipping unsupported TCP security type for %s", + entry->subnqn); + return; + } + break; + default: + warnx("Skipping unsupported transport %s for %s", + nvmf_transport_type(entry->trtype), entry->subnqn); + return; + } + + /* + * XXX: Track portids and avoid duplicate connections for a + * given (subnqn,portid)? + */ + + /* XXX: Should this make use of entry->aqsz in some way? */ + connect_nvm_controller(entry->trtype, adrfam, entry->traddr, + entry->trsvcid, entry->cntlid, entry->subnqn); +} + +static void +connect_discovery_log_page(struct nvmf_qpair *qp) +{ + struct nvme_discovery_log *log; + int error; + + error = nvmf_host_fetch_discovery_log_page(qp, &log); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch discovery log page"); + + for (u_int i = 0; i < log->numrec; i++) + connect_discovery_entry(&log->entries[i]); + free(log); +} + +static void +discover_controllers(enum nvmf_trtype trtype, const char *address, + const char *port) +{ + struct nvmf_qpair *qp; + + qp = connect_discovery_adminq(trtype, address, port, opt.hostnqn); + + connect_discovery_log_page(qp); + + nvmf_free_qpair(qp); +} + +static void +connect_fn(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + const char *address, *port; + char *tofree; + u_long cntlid; + int error; + + if (arg_parse(argc, argv, f)) + return; + + if (opt.num_io_queues <= 0) + errx(EX_USAGE, "Invalid number of I/O queues"); + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + if (port == NULL) + errx(EX_USAGE, "Explicit port required"); + + cntlid = nvmf_parse_cntlid(opt.cntlid); + + error = connect_nvm_controller(trtype, AF_UNSPEC, address, port, cntlid, + opt.subnqn); + if (error != 0) + exit(error); + + free(tofree); +} + +static void +connect_all_fn(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + const char *address, *port; + char *tofree; + + if (arg_parse(argc, argv, f)) + return; + + if (opt.num_io_queues <= 0) + errx(EX_USAGE, "Invalid number of I/O queues"); + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + discover_controllers(trtype, address, port); + + free(tofree); +} + +static const struct opts connect_opts[] = { +#define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("transport", 't', arg_string, opt, transport, + "Transport type"), + OPT("cntlid", 'c', arg_string, opt, cntlid, + "Controller ID"), + OPT("nr-io-queues", 'i', arg_uint16, opt, num_io_queues, + "Number of I/O queues"), + OPT("queue-size", 'Q', arg_uint16, opt, queue_size, + "Number of entries in each I/O queue"), + OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato, + "Keep Alive timeout (in seconds)"), + OPT("hostnqn", 'q', arg_string, opt, hostnqn, + "Host NQN"), + OPT("flow_control", 'F', arg_none, opt, flow_control, + "Request SQ flow control"), + OPT("hdr_digests", 'g', arg_none, opt, header_digests, + "Enable TCP PDU header digests"), + OPT("data_digests", 'G', arg_none, opt, data_digests, + "Enable TCP PDU data digests"), + { NULL, 0, arg_none, NULL, NULL } +}; +#undef OPT + +static const struct args connect_args[] = { + { arg_string, &opt.address, "address" }, + { arg_string, &opt.subnqn, "SubNQN" }, + { arg_none, NULL, NULL }, +}; + +static const struct args connect_all_args[] = { + { arg_string, &opt.address, "address" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd connect_cmd = { + .name = "connect", + .fn = connect_fn, + .descr = "Connect to a fabrics controller", + .ctx_size = sizeof(opt), + .opts = connect_opts, + .args = connect_args, +}; + +static struct cmd connect_all_cmd = { + .name = "connect-all", + .fn = connect_all_fn, + .descr = "Discover and connect to fabrics controllers", + .ctx_size = sizeof(opt), + .opts = connect_opts, + .args = connect_all_args, +}; + +CMD_COMMAND(connect_cmd); +CMD_COMMAND(connect_all_cmd); diff --git a/sbin/nvmecontrol/disconnect.c b/sbin/nvmecontrol/disconnect.c new file mode 100644 --- /dev/null +++ b/sbin/nvmecontrol/disconnect.c @@ -0,0 +1,82 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +static struct options { + const char *dev; +} opt = { + .dev = NULL +}; + +static const struct args args[] = { + { arg_string, &opt.dev, "controller-id|namespace-id|SubNQN" }, + { arg_none, NULL, NULL }, +}; + +static void +disconnect(const struct cmd *f, int argc, char *argv[]) +{ + int error, fd; + char *path; + + if (arg_parse(argc, argv, f)) + return; + if (nvmf_nqn_valid(opt.dev)) { + error = nvmf_disconnect_host(opt.dev); + if (error != 0) + errc(EX_IOERR, error, "failed to disconnect from %s", + opt.dev); + } else { + open_dev(opt.dev, &fd, 1, 1); + get_nsid(fd, &path, NULL); + close(fd); + + error = nvmf_disconnect_host(path); + if (error != 0) + errc(EX_IOERR, error, "failed to disconnect from %s", + path); + } + + exit(0); +} + +static void +disconnect_all(const struct cmd *f __unused, int argc __unused, + char *argv[] __unused) +{ + int error; + + error = nvmf_disconnect_all(); + if (error != 0) + errc(EX_IOERR, error, + "failed to disconnect from remote controllers"); + + exit(0); +} + +static struct cmd disconnect_cmd = { + .name = "disconnect", + .fn = disconnect, + .descr = "Disconnect from a fabrics controller", + .args = args, +}; + +static struct cmd disconnect_all_cmd = { + .name = "disconnect-all", + .fn = disconnect_all, + .descr = "Disconnect from all fabrics controllers", +}; + +CMD_COMMAND(disconnect_cmd); +CMD_COMMAND(disconnect_all_cmd); diff --git a/sbin/nvmecontrol/discover.c b/sbin/nvmecontrol/discover.c new file mode 100644 --- /dev/null +++ b/sbin/nvmecontrol/discover.c @@ -0,0 +1,300 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include + +#include "comnd.h" +#include "fabrics.h" +#include "nvmecontrol_ext.h" + +static struct options { + const char *transport; + const char *address; + const char *hostnqn; + bool verbose; +} opt = { + .transport = "tcp", + .address = NULL, + .hostnqn = NULL, + .verbose = false, +}; + +static void +identify_controller(struct nvmf_qpair *qp) +{ + struct nvme_controller_data cdata; + int error; + + error = nvmf_host_identify_controller(qp, &cdata); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch controller data"); + nvme_print_controller(&cdata); +} + +static const char * +nvmf_address_family(uint8_t adrfam) +{ + static char buf[8]; + + switch (adrfam) { + case NVMF_ADRFAM_IPV4: + return ("AF_INET"); + case NVMF_ADRFAM_IPV6: + return ("AF_INET6"); + case NVMF_ADRFAM_IB: + return ("InfiniBand"); + case NVMF_ADRFAM_FC: + return ("Fibre Channel"); + case NVMF_ADRFAM_INTRA_HOST: + return ("Intra-host"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", adrfam); + return (buf); + } +} + +static const char * +nvmf_subsystem_type(uint8_t subtype) +{ + static char buf[8]; + + switch (subtype) { + case NVMF_SUBTYPE_DISCOVERY: + return ("Discovery"); + case NVMF_SUBTYPE_NVME: + return ("NVMe"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", subtype); + return (buf); + } +} + +static const char * +nvmf_secure_channel(uint8_t treq) +{ + switch (treq & 0x03) { + case NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED: + return ("Not specified"); + case NVMF_TREQ_SECURE_CHANNEL_REQUIRED: + return ("Required"); + case NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED: + return ("Not required"); + default: + return ("0x03"); + } +} + +static const char * +nvmf_controller_id(uint16_t cntlid) +{ + static char buf[8]; + + switch (cntlid) { + case NVMF_CNTLID_DYNAMIC: + return ("Dynamic"); + case NVMF_CNTLID_STATIC_ANY: + return ("Static"); + default: + snprintf(buf, sizeof(buf), "%u", cntlid); + return (buf); + } +} + +static const char * +nvmf_rdma_service_type(uint8_t qptype) +{ + static char buf[8]; + + switch (qptype) { + case NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED: + return ("Reliable connected"); + case NVMF_RDMA_QPTYPE_RELIABLE_DATAGRAM: + return ("Reliable datagram"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", qptype); + return (buf); + } +} + +static const char * +nvmf_rdma_provider_type(uint8_t prtype) +{ + static char buf[8]; + + switch (prtype) { + case NVMF_RDMA_PRTYPE_NONE: + return ("None"); + case NVMF_RDMA_PRTYPE_IB: + return ("InfiniBand"); + case NVMF_RDMA_PRTYPE_ROCE: + return ("RoCE (v1)"); + case NVMF_RDMA_PRTYPE_ROCE2: + return ("RoCE (v2)"); + case NVMF_RDMA_PRTYPE_IWARP: + return ("iWARP"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", prtype); + return (buf); + } +} + +static const char * +nvmf_rdma_cms(uint8_t cms) +{ + static char buf[8]; + + switch (cms) { + case NVMF_RDMA_CMS_RDMA_CM: + return ("RDMA_IP_CM"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", cms); + return (buf); + } +} + +static const char * +nvmf_tcp_security_type(uint8_t sectype) +{ + static char buf[8]; + + switch (sectype) { + case NVME_TCP_SECURITY_NONE: + return ("None"); + case NVME_TCP_SECURITY_TLS_1_2: + return ("TLS 1.2"); + case NVME_TCP_SECURITY_TLS_1_3: + return ("TLS 1.3"); + default: + snprintf(buf, sizeof(buf), "0x%02x\n", sectype); + return (buf); + } +} + +static void +print_discovery_entry(u_int i, struct nvme_discovery_log_entry *entry) +{ + printf("Entry %02d\n", i + 1); + printf("========\n"); + printf(" Transport type: %s\n", + nvmf_transport_type(entry->trtype)); + printf(" Address family: %s\n", + nvmf_address_family(entry->adrfam)); + printf(" Subsystem type: %s\n", + nvmf_subsystem_type(entry->subtype)); + printf(" SQ flow control: %s\n", + (entry->treq & (1 << 2)) == 0 ? "required" : "optional"); + printf(" Secure Channel: %s\n", nvmf_secure_channel(entry->treq)); + printf(" Port ID: %u\n", entry->portid); + printf(" Controller ID: %s\n", + nvmf_controller_id(entry->cntlid)); + printf(" Max Admin SQ Size: %u\n", entry->aqsz); + printf(" Sub NQN: %s\n", entry->subnqn); + printf(" Transport address: %s\n", entry->traddr); + printf(" Service identifier: %s\n", entry->trsvcid); + switch (entry->trtype) { + case NVMF_TRTYPE_RDMA: + printf(" RDMA Service Type: %s\n", + nvmf_rdma_service_type(entry->tsas.rdma.rdma_qptype)); + printf(" RDMA Provider Type: %s\n", + nvmf_rdma_provider_type(entry->tsas.rdma.rdma_prtype)); + printf(" RDMA CMS: %s\n", + nvmf_rdma_cms(entry->tsas.rdma.rdma_cms)); + printf(" Partition key: %u\n", + entry->tsas.rdma.rdma_pkey); + break; + case NVMF_TRTYPE_TCP: + printf(" Security Type: %s\n", + nvmf_tcp_security_type(entry->tsas.tcp.sectype)); + break; + } +} + +static void +dump_discovery_log_page(struct nvmf_qpair *qp) +{ + struct nvme_discovery_log *log; + int error; + + error = nvmf_host_fetch_discovery_log_page(qp, &log); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch discovery log page"); + + printf("Discovery\n"); + printf("=========\n"); + if (log->numrec == 0) { + printf("No entries found\n"); + } else { + for (u_int i = 0; i < log->numrec; i++) + print_discovery_entry(i, &log->entries[i]); + } + free(log); +} + +static void +discover(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + struct nvmf_qpair *qp; + const char *address, *port; + char *tofree; + + if (arg_parse(argc, argv, f)) + return; + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + qp = connect_discovery_adminq(trtype, address, port, opt.hostnqn); + free(tofree); + + /* Use Identify to fetch controller data */ + if (opt.verbose) { + identify_controller(qp); + printf("\n"); + } + + /* Fetch Log pages */ + dump_discovery_log_page(qp); + + nvmf_free_qpair(qp); +} + +static const struct opts discover_opts[] = { +#define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("transport", 't', arg_string, opt, transport, + "Transport type"), + OPT("hostnqn", 'q', arg_string, opt, hostnqn, + "Host NQN"), + OPT("verbose", 'v', arg_none, opt, verbose, + "Display the discovery controller's controller data"), + { NULL, 0, arg_none, NULL, NULL } +}; +#undef OPT + +static const struct args discover_args[] = { + { arg_string, &opt.address, "address" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd discover_cmd = { + .name = "discover", + .fn = discover, + .descr = "List discovery log pages from a fabrics controller", + .ctx_size = sizeof(opt), + .opts = discover_opts, + .args = discover_args, +}; + +CMD_COMMAND(discover_cmd); diff --git a/sbin/nvmecontrol/fabrics.h b/sbin/nvmecontrol/fabrics.h new file mode 100644 --- /dev/null +++ b/sbin/nvmecontrol/fabrics.h @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#ifndef __FABRICS_H__ +#define __FABRICS_H__ + +/* + * Splits 'in_address' into separate 'address' and 'port' strings. If + * a separate buffer for the address was allocated, 'tofree' is set to + * the allocated buffer, otherwise 'tofree' is set to NULL. + */ +void nvmf_parse_address(const char *in_address, const char **address, + const char **port, char **tofree); + +uint16_t nvmf_parse_cntlid(const char *cntlid); + +/* Returns true if able to open a connection. */ +bool tcp_qpair_params(struct nvmf_qpair_params *params, int adrfam, + const char *address, const char *port); + +/* Connect to a discovery controller and return the Admin qpair. */ +struct nvmf_qpair *connect_discovery_adminq(enum nvmf_trtype trtype, + const char *address, const char *port, const char *hostnqn); + +/* + * Connect to an NVM controller establishing an Admin qpair and one or + * more I/O qpairs. The controller's controller data is returned in + * *cdata on success. Returns a non-zero value from on + * failure. + */ +int connect_nvm_queues(const struct nvmf_association_params *aparams, + enum nvmf_trtype trtype, int adrfam, const char *address, + const char *port, uint16_t cntlid, const char *subnqn, const char *hostnqn, + uint32_t kato, struct nvmf_qpair **admin, struct nvmf_qpair **io, + u_int num_io_queues, u_int queue_size, struct nvme_controller_data *cdata); + +#endif /* !__FABRICS_H__ */ diff --git a/sbin/nvmecontrol/fabrics.c b/sbin/nvmecontrol/fabrics.c new file mode 100644 --- /dev/null +++ b/sbin/nvmecontrol/fabrics.c @@ -0,0 +1,520 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fabrics.h" + +/* + * Subroutines shared by several Fabrics commands. + */ +static char nqn[NVMF_NQN_MAX_LEN]; +static uint8_t hostid[16]; +static bool hostid_initted = false; + +static bool +init_hostid(void) +{ + int error; + + if (hostid_initted) + return (true); + + error = nvmf_hostid_from_hostuuid(hostid); + if (error != 0) { + warnc(error, "Failed to generate hostid"); + return (false); + } + error = nvmf_nqn_from_hostuuid(nqn); + if (error != 0) { + warnc(error, "Failed to generate host NQN"); + return (false); + } + + hostid_initted = true; + return (true); +} + +void +nvmf_parse_address(const char *in_address, const char **address, + const char **port, char **tofree) +{ + char *cp; + + /* + * Accepts the following address formats: + * + * [IPv6 address]:port + * IPv4 address:port + * hostname:port + * [IPv6 address] + * IPv6 address + * IPv4 address + * hostname + */ + if (in_address[0] == '[') { + /* IPv6 address in square brackets. */ + cp = strchr(in_address + 1, ']'); + if (cp == NULL || cp == in_address + 1) + errx(EX_USAGE, "Invalid address %s", in_address); + *tofree = strndup(in_address + 1, cp - (in_address + 1)); + *address = *tofree; + + /* Skip over ']' */ + cp++; + switch (*cp) { + case '\0': + *port = NULL; + return; + case ':': + if (cp[1] != '\0') { + *port = cp + 1; + return; + } + /* FALLTHROUGH */ + default: + errx(EX_USAGE, "Invalid address %s", in_address); + } + } + + /* Look for the first colon. */ + cp = strchr(in_address, ':'); + if (cp == NULL) { + *address = in_address; + *port = NULL; + *tofree = NULL; + return; + } + + /* If there is another colon, assume this is an IPv6 address. */ + if (strchr(cp + 1, ':') != NULL) { + *address = in_address; + *port = NULL; + *tofree = NULL; + return; + } + + /* Both strings on either side of the colon must be non-empty. */ + if (cp == in_address || cp[1] == '\0') + errx(EX_USAGE, "Invalid address %s", in_address); + + *tofree = strndup(in_address, cp - in_address); + *address = *tofree; + + /* Skip over ':' */ + *port = cp + 1; +} + +uint16_t +nvmf_parse_cntlid(const char *cntlid) +{ + u_long value; + + if (strcasecmp(cntlid, "dynamic") == 0) + return (NVMF_CNTLID_DYNAMIC); + else if (strcasecmp(cntlid, "static") == 0) + return (NVMF_CNTLID_STATIC_ANY); + else { + value = strtoul(cntlid, NULL, 0); + + if (value > NVMF_CNTLID_STATIC_MAX) + errx(EX_USAGE, "Invalid controller ID"); + + return (value); + } +} + +bool +tcp_qpair_params(struct nvmf_qpair_params *params, int adrfam, + const char *address, const char *port) +{ + struct addrinfo hints, *ai, *list; + int error, s; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = adrfam; + hints.ai_protocol = IPPROTO_TCP; + error = getaddrinfo(address, port, &hints, &list); + if (error != 0) { + warnx("%s", gai_strerror(error)); + return (false); + } + + for (ai = list; ai != NULL; ai = ai->ai_next) { + s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (s == -1) + continue; + + if (connect(s, ai->ai_addr, ai->ai_addrlen) != 0) { + close(s); + continue; + } + + params->tcp.fd = s; + freeaddrinfo(list); + return (true); + } + warn("Failed to connect to controller at %s:%s", address, port); + return (false); +} + +static void +tcp_discovery_association_params(struct nvmf_association_params *params) +{ + params->tcp.pda = 0; + params->tcp.header_digests = false; + params->tcp.data_digests = false; + params->tcp.maxr2t = 1; +} + +struct nvmf_qpair * +connect_discovery_adminq(enum nvmf_trtype trtype, const char *address, + const char *port, const char *hostnqn) +{ + struct nvmf_association_params aparams; + struct nvmf_qpair_params qparams; + struct nvmf_association *na; + struct nvmf_qpair *qp; + uint64_t cap, cc, csts; + int error, timo; + + memset(&aparams, 0, sizeof(aparams)); + aparams.sq_flow_control = false; + switch (trtype) { + case NVMF_TRTYPE_TCP: + /* 7.4.9.3 Default port for discovery */ + if (port == NULL) + port = "8009"; + tcp_discovery_association_params(&aparams); + break; + default: + errx(EX_UNAVAILABLE, "Unsupported transport %s", + nvmf_transport_type(trtype)); + } + + if (!init_hostid()) + exit(EX_IOERR); + if (hostnqn != NULL) { + if (!nvmf_nqn_valid(hostnqn)) + errx(EX_USAGE, "Invalid HostNQN %s", hostnqn); + } else + hostnqn = nqn; + + na = nvmf_allocate_association(trtype, false, &aparams); + if (na == NULL) + err(EX_IOERR, "Failed to create discovery association"); + memset(&qparams, 0, sizeof(qparams)); + qparams.admin = true; + if (!tcp_qpair_params(&qparams, AF_UNSPEC, address, port)) + exit(EX_NOHOST); + qp = nvmf_connect(na, &qparams, 0, NVME_MIN_ADMIN_ENTRIES, hostid, + NVMF_CNTLID_DYNAMIC, NVMF_DISCOVERY_NQN, hostnqn, 0); + if (qp == NULL) + errx(EX_IOERR, "Failed to connect to discovery controller: %s", + nvmf_association_error(na)); + nvmf_free_association(na); + + /* Fetch Controller Capabilities Property */ + error = nvmf_read_property(qp, NVMF_PROP_CAP, 8, &cap); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch CAP"); + + /* Set Controller Configuration Property (CC.EN=1) */ + error = nvmf_read_property(qp, NVMF_PROP_CC, 4, &cc); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch CC"); + + /* Clear known fields preserving any reserved fields. */ + cc &= ~(NVMEM(NVME_CC_REG_SHN) | NVMEM(NVME_CC_REG_AMS) | + NVMEM(NVME_CC_REG_MPS) | NVMEM(NVME_CC_REG_CSS)); + + /* Leave AMS, MPS, and CSS as 0. */ + + cc |= NVMEF(NVME_CC_REG_EN, 1); + + error = nvmf_write_property(qp, NVMF_PROP_CC, 4, cc); + if (error != 0) + errc(EX_IOERR, error, "Failed to set CC"); + + /* Wait for CSTS.RDY in Controller Status */ + timo = NVME_CAP_LO_TO(cap); + for (;;) { + error = nvmf_read_property(qp, NVMF_PROP_CSTS, 4, &csts); + if (error != 0) + errc(EX_IOERR, error, "Failed to fetch CSTS"); + + if (NVMEV(NVME_CSTS_REG_RDY, csts) != 0) + break; + + if (timo == 0) + errx(EX_IOERR, "Controller failed to become ready"); + timo--; + usleep(500 * 1000); + } + + return (qp); +} + +/* + * XXX: Should this accept the admin queue size as a parameter rather + * than always using NVMF_MIN_ADMIN_MAX_SQ_SIZE? + */ +static int +connect_nvm_adminq(struct nvmf_association *na, + const struct nvmf_qpair_params *params, struct nvmf_qpair **qpp, + uint16_t cntlid, const char *subnqn, const char *hostnqn, uint32_t kato, + uint16_t *mqes) +{ + struct nvmf_qpair *qp; + uint64_t cap, cc, csts; + u_int mps, mpsmin, mpsmax; + int error, timo; + + qp = nvmf_connect(na, params, 0, NVMF_MIN_ADMIN_MAX_SQ_SIZE, hostid, + cntlid, subnqn, hostnqn, kato); + if (qp == NULL) { + warnx("Failed to connect to NVM controller %s: %s", subnqn, + nvmf_association_error(na)); + return (EX_IOERR); + } + + /* Fetch Controller Capabilities Property */ + error = nvmf_read_property(qp, NVMF_PROP_CAP, 8, &cap); + if (error != 0) { + warnc(error, "Failed to fetch CAP"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + /* Require the NVM command set. */ + if (NVME_CAP_HI_CSS_NVM(cap >> 32) == 0) { + warnx("Controller %s does not support the NVM command set", + subnqn); + nvmf_free_qpair(qp); + return (EX_UNAVAILABLE); + } + + *mqes = NVME_CAP_LO_MQES(cap); + + /* Prefer native host page size if it fits. */ + mpsmin = NVMEV(NVME_CAP_HI_REG_MPSMIN, cap >> 32); + mpsmax = NVMEV(NVME_CAP_HI_REG_MPSMAX, cap >> 32); + mps = ffs(getpagesize()) - 1; + if (mps < mpsmin + NVME_MPS_SHIFT) + mps = mpsmin; + else if (mps > mpsmax + NVME_MPS_SHIFT) + mps = mpsmax; + else + mps -= NVME_MPS_SHIFT; + + /* Configure controller. */ + error = nvmf_read_property(qp, NVMF_PROP_CC, 4, &cc); + if (error != 0) { + warnc(error, "Failed to fetch CC"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + /* Clear known fields preserving any reserved fields. */ + cc &= ~(NVMEM(NVME_CC_REG_IOCQES) | NVMEM(NVME_CC_REG_IOSQES) | + NVMEM(NVME_CC_REG_SHN) | NVMEM(NVME_CC_REG_AMS) | + NVMEM(NVME_CC_REG_MPS) | NVMEM(NVME_CC_REG_CSS)); + + cc |= NVMEF(NVME_CC_REG_IOCQES, 4); /* CQE entry size == 16 */ + cc |= NVMEF(NVME_CC_REG_IOSQES, 6); /* SEQ entry size == 64 */ + cc |= NVMEF(NVME_CC_REG_AMS, 0); /* AMS 0 (Round-robin) */ + cc |= NVMEF(NVME_CC_REG_MPS, mps); + cc |= NVMEF(NVME_CC_REG_CSS, 0); /* NVM command set */ + cc |= NVMEF(NVME_CC_REG_EN, 1); /* EN = 1 */ + + error = nvmf_write_property(qp, NVMF_PROP_CC, 4, cc); + if (error != 0) { + warnc(error, "Failed to set CC"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + /* Wait for CSTS.RDY in Controller Status */ + timo = NVME_CAP_LO_TO(cap); + for (;;) { + error = nvmf_read_property(qp, NVMF_PROP_CSTS, 4, &csts); + if (error != 0) { + warnc(error, "Failed to fetch CSTS"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + + if (NVMEV(NVME_CSTS_REG_RDY, csts) != 0) + break; + + if (timo == 0) { + warnx("Controller failed to become ready"); + nvmf_free_qpair(qp); + return (EX_IOERR); + } + timo--; + usleep(500 * 1000); + } + + *qpp = qp; + return (0); +} + +static void +shutdown_controller(struct nvmf_qpair *qp) +{ + uint64_t cc; + int error; + + error = nvmf_read_property(qp, NVMF_PROP_CC, 4, &cc); + if (error != 0) { + warnc(error, "Failed to fetch CC"); + goto out; + } + + cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); + + error = nvmf_write_property(qp, NVMF_PROP_CC, 4, cc); + if (error != 0) { + warnc(error, "Failed to set CC to trigger shutdown"); + goto out; + } + +out: + nvmf_free_qpair(qp); +} + +/* Returns a value from */ +int +connect_nvm_queues(const struct nvmf_association_params *aparams, + enum nvmf_trtype trtype, int adrfam, const char *address, + const char *port, uint16_t cntlid, const char *subnqn, const char *hostnqn, + uint32_t kato, struct nvmf_qpair **admin, struct nvmf_qpair **io, + u_int num_io_queues, u_int queue_size, struct nvme_controller_data *cdata) +{ + struct nvmf_qpair_params qparams; + struct nvmf_association *na; + u_int queues; + int error; + uint16_t mqes; + + switch (trtype) { + case NVMF_TRTYPE_TCP: + break; + default: + warnx("Unsupported transport %s", nvmf_transport_type(trtype)); + return (EX_UNAVAILABLE); + } + + if (!init_hostid()) + return (EX_IOERR); + if (hostnqn != NULL) { + if (!nvmf_nqn_valid(hostnqn)) { + warnx("Invalid HostNQN %s", hostnqn); + return (EX_USAGE); + } + } else + hostnqn = nqn; + + /* Association. */ + na = nvmf_allocate_association(trtype, false, aparams); + if (na == NULL) { + warn("Failed to create association for %s", subnqn); + return (EX_IOERR); + } + + /* Admin queue. */ + memset(&qparams, 0, sizeof(qparams)); + qparams.admin = true; + if (!tcp_qpair_params(&qparams, adrfam, address, port)) { + nvmf_free_association(na); + return (EX_NOHOST); + } + error = connect_nvm_adminq(na, &qparams, admin, cntlid, subnqn, hostnqn, + kato, &mqes); + if (error != 0) { + nvmf_free_association(na); + return (error); + } + + /* Validate I/O queue size. */ + if (queue_size == 0) + queue_size = mqes + 1; + else if (queue_size > mqes + 1) { + shutdown_controller(*admin); + nvmf_free_association(na); + warn("I/O queue size exceeds controller maximum (%u)", + mqes + 1); + return (EX_USAGE); + } + + /* Fetch controller data. */ + error = nvmf_host_identify_controller(*admin, cdata); + if (error != 0) { + shutdown_controller(*admin); + nvmf_free_association(na); + warnc(error, "Failed to fetch controller data for %s", subnqn); + return (EX_IOERR); + } + + nvmf_update_assocation(na, cdata); + + error = nvmf_host_request_queues(*admin, num_io_queues, &queues); + if (error != 0) { + shutdown_controller(*admin); + nvmf_free_association(na); + warnc(error, "Failed to request I/O queues"); + return (EX_IOERR); + } + if (queues < num_io_queues) { + shutdown_controller(*admin); + nvmf_free_association(na); + warnx("Controller enabled fewer I/O queues (%u) than requested (%u)", + queues, num_io_queues); + return (EX_PROTOCOL); + } + + /* I/O queues. */ + memset(io, 0, sizeof(io) * num_io_queues); + for (u_int i = 0; i < num_io_queues; i++) { + memset(&qparams, 0, sizeof(qparams)); + qparams.admin = false; + if (!tcp_qpair_params(&qparams, adrfam, address, port)) { + error = EX_NOHOST; + goto out; + } + io[i] = nvmf_connect(na, &qparams, i + 1, queue_size, hostid, + nvmf_cntlid(*admin), subnqn, hostnqn, 0); + if (io[i] == NULL) { + warnx("Failed to create I/O queue: %s", + nvmf_association_error(na)); + error = EX_IOERR; + goto out; + } + } + nvmf_free_association(na); + return (0); + +out: + for (u_int i = 0; i < num_io_queues; i++) { + if (io[i] == NULL) + break; + nvmf_free_qpair(io[i]); + } + shutdown_controller(*admin); + nvmf_free_association(na); + return (error); +} diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8 --- a/sbin/nvmecontrol/nvmecontrol.8 +++ b/sbin/nvmecontrol/nvmecontrol.8 @@ -205,9 +205,48 @@ .Ic io-passthru .Op args .Aq Ar namespace-id +.Nm +.Ic discover +.Op Fl v +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Nm +.Ic connect +.Op Fl FGg +.Op Fl c Ar cntl-id +.Op Fl i Ar queues +.Op Fl k Ar seconds +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Op Fl Q Ar entries +.Aq Ar address +.Aq Ar SubNQN +.Nm +.Ic connect-all +.Op Fl FGg +.Op Fl i Ar queues +.Op Fl k Ar seconds +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Op Fl Q Ar entries +.Aq Ar address +.Nm +.Ic disconnect +.Aq Ar device-id | Ar namespace-id | Ar SubNQN +.Nm +.Ic reconnect +.Op Fl FGg +.Op Fl i Ar queues +.Op Fl k Ar seconds +.Op Fl t Ar transport +.Op Fl q Ar HostNQN +.Op Fl Q Ar entries +.Aq Ar device-id +.Aq Ar address .Sh DESCRIPTION -NVM Express (NVMe) is a storage protocol standard, for SSDs and other -high-speed storage devices over PCI Express. +NVM Express (NVMe) is a storage protocol standard for SSDs and other +high-speed storage devices over PCI Express as well as remote storage +devices accessed via a network fabric. .Ss devlist List all NVMe controllers and namespaces along with their device nodes. With the @@ -676,6 +715,97 @@ Commands needing metadata are not supported by the .Xr nvme 4 drive. +.Ss discover +List the remote controllers advertised by a remote Discovery Controller: +.Bl -tag -width 6n +.It Fl t Ar transport +Transport to use. +The default is +.It Fl q Ar HostNQN +NVMe Qualified Name to use for this host. +By default an NQN is auto-generated from the current host's UUID. +.Ar tcp . +.It Fl v +Display the +.Dv IDENTIFY_CONTROLLER +data for the Discovery Controller. +.El +.Ss connect +Establish an association with the I/O controller named +.Ar SubNQN +at +.Ar address . +The address must include a port. +.Pp +An admin queue pair and one or more I/O queue pairs are created and handed +off to the kernel to create a new controller device. +.Bl -tag -width 6n +.It Fl c Ar cntl-id +Remote controller ID to request: +.Bl -tag +.It dynamic +Request a dynamic controller ID for controllers using the dynamic +controller model. +This is the default. +.It static +Request a dynamic controller ID for controllers using the static +controller model. +.It Ar number +Request a specific controller ID for controllers using the static +controller model. +.El +.It Fl F +Request submission queue flow control. +By default submission queue flow control is disabled unless the remote +controller requires it. +.It Fl g +Enable TCP PDU header digests. +.It Fl G +Enable TCP PDU data digests. +.It Fl i Ar queues +Number of I/O queue pairs to create. +The default is 1. +.It Fl k Ar seconds +Keep Alive timer duration in seconds. +The default is 120. +.It Fl t Ar transport +Transport to use. +The default is +.Ar tcp . +.It Fl q Ar HostNQN +NVMe Qualified Name to use for this host. +By default an NQN is auto-generated from the current host's UUID. +.It Fl Q Ar entries +Number of entries in each I/O queue. +By default the maximum queue size reported by the MQES field +of the remote host's CAP property is used. +.El +.Ss connect-all +Query the Discovery Controller at +.Ar address +and establish an association for each advertised I/O controller. +The +.Fl t +flag determines the transport used for the initial association with +the Discovery Controller and defaults to +.Ar tcp . +All other flags are used to control properties of each I/O assocation as +described above for the +.Cm connect +command. +.Ss disconnect +Delete the controller device associated with a remote I/O controller +including any active association and open queues. +.Ss reconnect +Reestablish an association for the remote I/O controller associated with +.Ar device-id +at +.Ar address . +The address must include a port. +The flags have the same meaning for the new association as described above +for the +.Cm connect +command. .Sh DEVICE NAMES Where .Aq Ar namespace-id @@ -705,6 +835,37 @@ of .Dq 0 means query the drive itself. +.Sh FABRICS TRANSPORTS +The following NVM Express over Fabrics transports are supported for +accessing remote controllers: +.Bl -tag +.It tcp +TCP transport +.El +.Sh NETWORK ADDRESSES +Network addresses for remote controllers can use one of the following formats: +.Bl -bullet +.It +.Bq Ar IPv6 address +.Ns : Ns Ar port +.It +.Ar IPv4 address +.Ns : Ns Ar port +.It +.Ar hostname Ns : Ns Ar port +.It +.Bq Ar IPv6 address +.It +.Ar IPv6 address +.It +.Ar IPv4 address +.It +.Ar hostname +.El +.Pp +If a +.Ar port +is not provided, a default value is used if possible. .Sh EXAMPLES .Dl nvmecontrol devlist .Pp diff --git a/sbin/nvmecontrol/reconnect.c b/sbin/nvmecontrol/reconnect.c new file mode 100644 --- /dev/null +++ b/sbin/nvmecontrol/reconnect.c @@ -0,0 +1,167 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2024 Chelsio Communications, Inc. + * Written by: John Baldwin + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" +#include "fabrics.h" + +/* + * See comment about other possible settings in connect.c. + */ + +static struct options { + const char *dev; + const char *transport; + const char *address; + const char *hostnqn; + uint32_t kato; + uint16_t num_io_queues; + uint16_t queue_size; + bool data_digests; + bool flow_control; + bool header_digests; +} opt = { + .dev = NULL, + .transport = "tcp", + .address = NULL, + .hostnqn = NULL, + .kato = NVMF_KATO_DEFAULT / 1000, + .num_io_queues = 1, + .queue_size = 0, + .data_digests = false, + .flow_control = false, + .header_digests = false, +}; + +static void +tcp_association_params(struct nvmf_association_params *params) +{ + params->tcp.pda = 0; + params->tcp.header_digests = opt.header_digests; + params->tcp.data_digests = opt.data_digests; + /* XXX */ + params->tcp.maxr2t = 1; +} + +static int +reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam, + const char *address, const char *port) +{ + struct nvme_controller_data cdata; + struct nvmf_association_params aparams; + struct nvmf_reconnect_params rparams; + struct nvmf_qpair *admin, **io; + int error; + + error = nvmf_reconnect_params(fd, &rparams); + if (error != 0) { + warnc(error, "Failed to fetch reconnect parameters"); + return (EX_IOERR); + } + + memset(&aparams, 0, sizeof(aparams)); + aparams.sq_flow_control = opt.flow_control; + switch (trtype) { + case NVMF_TRTYPE_TCP: + tcp_association_params(&aparams); + break; + default: + warnx("Unsupported transport %s", nvmf_transport_type(trtype)); + return (EX_UNAVAILABLE); + } + + io = calloc(opt.num_io_queues, sizeof(*io)); + error = connect_nvm_queues(&aparams, trtype, adrfam, address, port, + rparams.cntlid, rparams.subnqn, opt.hostnqn, opt.kato, &admin, io, + opt.num_io_queues, opt.queue_size, &cdata); + if (error != 0) + return (error); + + error = nvmf_reconnect_host(fd, admin, opt.num_io_queues, io, &cdata); + if (error != 0) { + warnc(error, "Failed to handoff queues to kernel"); + return (EX_IOERR); + } + free(io); + return (0); +} + +static void +reconnect_fn(const struct cmd *f, int argc, char *argv[]) +{ + enum nvmf_trtype trtype; + const char *address, *port; + char *tofree; + int error, fd; + + if (arg_parse(argc, argv, f)) + return; + + if (strcasecmp(opt.transport, "tcp") == 0) { + trtype = NVMF_TRTYPE_TCP; + } else + errx(EX_USAGE, "Unsupported or invalid transport"); + + nvmf_parse_address(opt.address, &address, &port, &tofree); + + open_dev(opt.dev, &fd, 1, 1); + if (port == NULL) + errx(EX_USAGE, "Explicit port required"); + + error = reconnect_nvm_controller(fd, trtype, AF_UNSPEC, address, port); + if (error != 0) + exit(error); + + close(fd); + free(tofree); +} + +static const struct opts reconnect_opts[] = { +#define OPT(l, s, t, opt, addr, desc) { l, s, t, &opt.addr, desc } + OPT("transport", 't', arg_string, opt, transport, + "Transport type"), + OPT("nr-io-queues", 'i', arg_uint16, opt, num_io_queues, + "Number of I/O queues"), + OPT("queue-size", 'Q', arg_uint16, opt, queue_size, + "Number of entries in each I/O queue"), + OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato, + "Keep Alive timeout (in seconds)"), + OPT("hostnqn", 'q', arg_string, opt, hostnqn, + "Host NQN"), + OPT("flow_control", 'F', arg_none, opt, flow_control, + "Request SQ flow control"), + OPT("hdr_digests", 'g', arg_none, opt, header_digests, + "Enable TCP PDU header digests"), + OPT("data_digests", 'G', arg_none, opt, data_digests, + "Enable TCP PDU data digests"), + { NULL, 0, arg_none, NULL, NULL } +}; +#undef OPT + +static const struct args reconnect_args[] = { + { arg_string, &opt.dev, "controller-id" }, + { arg_string, &opt.address, "address" }, + { arg_none, NULL, NULL }, +}; + +static struct cmd reconnect_cmd = { + .name = "reconnect", + .fn = reconnect_fn, + .descr = "Reconnect to a fabrics controller", + .ctx_size = sizeof(opt), + .opts = reconnect_opts, + .args = reconnect_args, +}; + +CMD_COMMAND(reconnect_cmd);