Index: sbin/nvmecontrol/Makefile =================================================================== --- sbin/nvmecontrol/Makefile +++ sbin/nvmecontrol/Makefile @@ -3,7 +3,7 @@ PACKAGE=runtime PROG= nvmecontrol SRCS= nvmecontrol.c devlist.c firmware.c identify.c logpage.c \ - perftest.c reset.c nvme_util.c power.c wdc.c + perftest.c reset.c nvme_util.c power.c wdc.c ns.c utils.c MAN= nvmecontrol.8 .PATH: ${SRCTOP}/sys/dev/nvme Index: sbin/nvmecontrol/identify.c =================================================================== --- sbin/nvmecontrol/identify.c +++ sbin/nvmecontrol/identify.c @@ -44,6 +44,7 @@ print_controller(struct nvme_controller_data *cdata) { uint8_t str[128]; + char cbuf[UINT128_DIG + 1]; printf("Controller Capabilities/Features\n"); printf("================================\n"); @@ -65,6 +66,7 @@ printf("Unlimited\n"); else printf("%d\n", PAGE_SIZE * (1 << cdata->mdts)); + printf("Controller ID: 0x%02x\n", cdata->ctrlr_id); printf("\n"); printf("Admin Command Set Attributes\n"); @@ -75,6 +77,9 @@ cdata->oacs.format ? "Supported" : "Not Supported"); printf("Firmware Activate/Download: %s\n", cdata->oacs.firmware ? "Supported" : "Not Supported"); + printf("Namespace Managment: %s\n", + cdata->oacs.nsmgmt ? "Supported" : "Not Supported"); + printf("Abort Command Limit: %d\n", cdata->acl+1); printf("Async Event Request Limit: %d\n", cdata->aerl+1); printf("Number of Firmware Slots: "); @@ -110,6 +115,24 @@ cdata->oncs.dsm ? "Supported" : "Not Supported"); printf("Volatile Write Cache: %s\n", cdata->vwc.present ? "Present" : "Not Present"); + + if (cdata->oacs.nsmgmt) { + uint128_t total, unalloc, inuse; + unalloc = to128(cdata->untncap.unvmcap); + total = to128(cdata->untncap.tnvmcap); + inuse = (total - unalloc); + + printf("\n"); + printf("Namespace Drive Attributes\n"); + printf("==========================\n"); + printf("NVM total cap: %s\n", + uint128_to_str(total, cbuf, sizeof(cbuf))); + printf("NVM unallocated cap: %s\n", + uint128_to_str(unalloc, cbuf, sizeof(cbuf))); + printf("NVM in use ctrlr %d %s\n", cdata->ctrlr_id, + uint128_to_str(inuse, cbuf, sizeof(cbuf))); + printf("\n"); + } } static void Index: sbin/nvmecontrol/logpage.c =================================================================== --- sbin/nvmecontrol/logpage.c +++ sbin/nvmecontrol/logpage.c @@ -87,46 +87,7 @@ * billion IOPs for billions of seconds to overflow them. * So, on 32-bit i386, you'll get truncated values. */ -#define UINT128_DIG 39 -#ifdef __i386__ -typedef uint64_t uint128_t; -#else -typedef __uint128_t uint128_t; -#endif - -static inline uint128_t -to128(void *p) -{ - return *(uint128_t *)p; -} - -static char * -uint128_to_str(uint128_t u, char *buf, size_t buflen) -{ - char *end = buf + buflen - 1; - - *end-- = '\0'; - if (u == 0) - *end-- = '0'; - while (u && end >= buf) { - *end-- = u % 10 + '0'; - u /= 10; - } - end++; - if (u != 0) - return NULL; - - return end; -} -/* "Missing" from endian.h */ -static __inline uint64_t -le48dec(const void *pp) -{ - uint8_t const *p = (uint8_t const *)pp; - - return (((uint64_t)le16dec(p + 4) << 32) | le32dec(p)); -} static void * get_log_buffer(uint32_t size) Index: sbin/nvmecontrol/ns.c =================================================================== --- /dev/null +++ sbin/nvmecontrol/ns.c @@ -0,0 +1,314 @@ +/*- + * Copyright (c) 2017 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "nvmecontrol.h" + +/* handles NVME_OPC_NAMESPACE_MANAGEMENT and ATTACHMENT admin cmds */ + +static void +nscreate_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, NSCREATE_USAGE); + exit(1); +} +static void +nsdelete_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, NSDELETE_USAGE); + exit(1); +} +static void +nsattach_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, NSATTACH_USAGE); + exit(1); +} +static void +nsdetach_usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, NSDETACH_USAGE); + exit(1); +} + +/* + * NS MGMT Command specific status values: + * 0xa = Invalid Format + * 0x15 = Namespace Insuffience capacity + * 0x16 = Namespace ID unavailable (number namespaces exceeded) + * 0xb = Thin Provisioning Not supported + */ +void +nscreate(int argc, char *argv[]) +{ + struct nvme_pt_command pt; + struct nvme_namespace_data nsdata; + + uint64_t nsiz, cap; + int ch, fd, format = -1; + + if (optind >= argc) + nscreate_usage(); + + while ((ch = getopt(argc, argv, "s:c:f:")) != -1) { + switch (ch) { + case 's': + nsiz = atol(optarg); + break; + case 'c': + cap = atol(optarg); + break; + case 'f': + format = atol(optarg); + break; + default: + nscreate_usage(); + } + } + + if (optind >= argc) + nscreate_usage(); + + nsdata.nsze = nsiz; + nsdata.ncap = cap; + if (format == -1) { + nsdata.flbas.format = 0x2; + } + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_NAMESPACE_MANAGEMENT; + + pt.cmd.cdw10 = 0; /* create */ + pt.buf = &nsdata; + pt.len = sizeof(struct nvme_namespace_data); + pt.is_read = 0; /* passthrough writes data to ctrlr */ + open_dev(argv[optind], &fd, 1, 1); + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) + errx(1, "nscreate request failed"); + + if (nvme_completion_is_error(&pt.cpl)) { + errx(1, "nscreate request returned error 0x%x", + pt.cpl.status.sc); + } else { + fprintf(stderr,"%s-I-new nsid = %d\n", __FUNCTION__, + pt.cpl.cdw0); + } + exit(0); +} + +void +nsdelete(int argc, char *argv[]) +{ + struct nvme_pt_command pt; + int ch, fd; + int nsid; + char answer[10]; + char buf[2]; + + if (optind >= argc) + nsdelete_usage(); + + while ((ch = getopt(argc, argv, "n:")) != -1) { + switch ((char)ch) { + case 'n': + nsid = atoi(optarg); + break; + default: + nsdelete_usage(); + } + } + if (optind >= argc) + nsdelete_usage(); + + + if (nsid == -1) { /* delete all hurts */ + memset(answer,0,10); + printf("using -1 as nsid will take out ALL namespaces, are you sure? [yes|no] \n"); + fgets(answer, 10, stdin); + if (strncmp(answer,"yes",3) != 0) { + printf("nsdelete cancelled.\n"); + exit(0); + } + printf("ok, removing all, watch your head\n"); + } + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_NAMESPACE_MANAGEMENT; + pt.cmd.cdw10 = 1; /* delete */ + pt.buf = buf; + pt.len = sizeof(buf); + pt.is_read = 1; + + pt.cmd.nsid = nsid; + + open_dev(argv[optind], &fd, 1, 1); + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) + errx(1, "remove namespace request to %s failed", argv[optind]); + + if (nvme_completion_is_error(&pt.cpl)) { + errx(1, "nsdelete request returned error: 0x%x\n", + pt.cpl.status.sc); + + } else { + printf("namespace %d deleted\n", + nsid); + } + + exit(0); +} + + +/* + * Attach and Detach use Dword 10, and a controller list (section 4.9) + * This struct is 4096 bytes in size. + * 0h = attach + * 1h = detach + * Completion 18h = Already attached + * 19h = NS is private and already attached to a controller + * 1Ah = Not attached, request could not be completed + * 1Ch = Controller list invalid. + */ +void +nsattach(int argc, char *argv[]) +{ + struct nvme_pt_command pt; + struct ctrlr_list clist; + uint32_t nsid; + uint16_t ctrlrid; + + int fd, ch, result; + + if (optind >= argc) + nsattach_usage(); + + while ((ch = getopt(argc, argv, "n:c:")) != -1) { + switch (ch) { + case 'n': + nsid = atol(optarg); + break; + case 'c': + ctrlrid = atol(optarg); + break; + default: + nsattach_usage(); + } + } + + if (optind >= argc) + nsattach_usage(); + + + clist.ctrlr_cnt = 1; + clist.ctrlrs[0] = ctrlrid; + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_NAMESPACE_ATTACHMENT; + pt.cmd.cdw10 = 0; /* attach */ + pt.cmd.nsid = nsid; + pt.buf = &clist; // we don't support shared ns currently + pt.len = sizeof(clist); + + open_dev(argv[optind], &fd, 1, 1); + if ( (result = ioctl(fd, NVME_PASSTHROUGH_CMD, &pt)) < 0) + err(1, "attach request to %s failed", argv[optind]); + + if (nvme_completion_is_error(&pt.cpl)) { + errx(1, "nsattach returned error 0x%x\n", + pt.cpl.status.sc); + } else { + fprintf(stderr,"nsid %d attached to %s\n", + nsid, argv[optind]); + } + exit(0); +} + + +void +nsdetach(int argc, char *argv[]) +{ + struct nvme_pt_command pt; + struct ctrlr_list clist; + uint16_t ctrlrid; + int fd, ch, nsid; + + if (optind >= argc) + nsdetach_usage(); + + while ((ch = getopt(argc, argv, "n:c:")) != -1) { + switch (ch) { + case 'n': + nsid = atol(optarg); + break; + case 'c': + ctrlrid = atol(optarg); + break; + default: + nsdetach_usage(); + } + } + + if (optind >= argc) + nsdetach_usage(); + + clist.ctrlr_cnt = 1; + clist.ctrlrs[0] = ctrlrid; /* shared not supported */ + + memset(&pt, 0, sizeof(pt)); + pt.cmd.opc = NVME_OPC_NAMESPACE_ATTACHMENT; + pt.cmd.cdw10 = 1; /* detach */ + pt.cmd.nsid = nsid; + pt.buf = &clist; + pt.len = sizeof(clist); + + open_dev(argv[optind], &fd, 1, 1); + if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) + err(1, "detach request to %s failed", argv[optind]); + + if (nvme_completion_is_error(&pt.cpl)) { + errx(1, "nsdetach returned error 0x%x\n", + pt.cpl.status.sc); + } else { + fprintf(stderr,"nsid %d detached from %s\n", + nsid, argv[optind]); + } + exit(0); +} Index: sbin/nvmecontrol/nvmecontrol.h =================================================================== --- sbin/nvmecontrol/nvmecontrol.h +++ sbin/nvmecontrol/nvmecontrol.h @@ -68,6 +68,17 @@ #define WDC_USAGE \ " nvmecontrol wdc (cap-diag|drive-log|get-crash-dump|purge|purge-montior)\n" +#define NSCREATE_USAGE \ +" nvmecontrol nscreate -s nssize -c capacity [-f format] nvmeN\n" + +#define NSDELETE_USAGE \ +" nvmecontrol nsdelete -n namespace id nvmeN\n" + +#define NSATTACH_USAGE \ +" nvmecontrol nsattach -n namespace id -c controller id nvmeN \n" + +#define NSDETACH_USAGE \ +" nvmecontrol nsdetach -n namespace id -c controller id nvmeN\n" void devlist(int argc, char *argv[]); void identify(int argc, char *argv[]); @@ -77,6 +88,10 @@ void firmware(int argc, char *argv[]); void power(int argc, char *argv[]); void wdc(int argc, char *argv[]); +void nscreate(int argc, char *argv[]); +void nsdelete(int argc, char *argv[]); +void nsattach(int argc, char *argv[]); +void nsdetach(int argc, char *argv[]); int open_dev(const char *str, int *fd, int show_error, int exit_on_error); void parse_ns_str(const char *ns_str, char *ctrlr_str, int *nsid); @@ -88,5 +103,22 @@ void gen_usage(struct nvme_function *); void dispatch(int argc, char *argv[], struct nvme_function *f); +/* + * 128-bit integer augments to standard values. On i386 this + * doesn't exist, so we use 64-bit values. The 128-bit counters + * are crazy anyway, since for this purpose, you'd need a + * billion IOPs for billions of seconds to overflow them. + * So, on 32-bit i386, you'll get truncated values. + */ +#define UINT128_DIG 39 +#ifdef __i386__ +typedef uint64_t uint128_t; +#else +typedef __uint128_t uint128_t; +#endif +uint128_t to128(void *p); +char *uint128_to_str(uint128_t u, char *buf, size_t buflen); +uint64_t le48dec(const void *pp); + #endif Index: sbin/nvmecontrol/nvmecontrol.c =================================================================== --- sbin/nvmecontrol/nvmecontrol.c +++ sbin/nvmecontrol/nvmecontrol.c @@ -55,6 +55,10 @@ {"firmware", firmware, FIRMWARE_USAGE}, {"power", power, POWER_USAGE}, {"wdc", wdc, WDC_USAGE}, + {"nscreate", nscreate, NSCREATE_USAGE}, + {"nsdelete", nsdelete, NSDELETE_USAGE}, + {"nsattach", nsattach, NSATTACH_USAGE}, + {"nsdetach", nsdetach, NSDETACH_USAGE}, {NULL, NULL, NULL}, }; Index: sbin/nvmecontrol/utils.c =================================================================== --- /dev/null +++ sbin/nvmecontrol/utils.c @@ -0,0 +1,67 @@ +/*- + * Copyright (c) 2017 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); +#include +#include + +#include "nvmecontrol.h" + +inline uint128_t +to128(void *p) +{ + return *(uint128_t *)p; +} + +char * +uint128_to_str(uint128_t u, char *buf, size_t buflen) +{ + char *end = buf + buflen - 1; + + *end-- = '\0'; + if (u == 0) + *end-- = '0'; + while (u && end >= buf) { + *end-- = u % 10 + '0'; + u /= 10; + } + end++; + if (u != 0) + return NULL; + + return end; +} + +/* "Missing" from endian.h */ +uint64_t +le48dec(const void *pp) +{ + uint8_t const *p = (uint8_t const *)pp; + + return (((uint64_t)le16dec(p + 4) << 32) | le32dec(p)); +} + Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 +++ sys/conf/files.amd64 @@ -341,7 +341,7 @@ dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme -dev/nvme/nvme_sim.c optional nvme scbus !nvd +dev/nvme/nvme_sim.c optional nvme scbus dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme Index: sys/dev/nvd/nvd.c =================================================================== --- sys/dev/nvd/nvd.c +++ sys/dev/nvd/nvd.c @@ -35,11 +35,13 @@ #include #include #include +#include #include #include #include +#include /* xx */ #define NVD_STR "nvd" @@ -56,6 +58,7 @@ static void *nvd_new_controller(struct nvme_controller *ctrlr); static void nvd_controller_fail(void *ctrlr); +static void *nvd_remove_disk(int nsid, void *ctrlr_arg); static int nvd_load(void); static void nvd_unload(void); @@ -139,7 +142,8 @@ TAILQ_INIT(&disk_head); consumer_handle = nvme_register_consumer(nvd_new_disk, - nvd_new_controller, NULL, nvd_controller_fail); + nvd_new_controller, NULL, nvd_controller_fail, + nvd_remove_disk); return (consumer_handle != NULL ? 0 : -1); } @@ -298,7 +302,9 @@ TAILQ_INIT(&nvd_ctrlr->disk_head); TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq); - + printf("%s-I-map nvmec[%p] to nvdc[%p]\n", __FUNCTION__, + ctrlr, nvd_ctrlr); + return (nvd_ctrlr); } @@ -309,7 +315,7 @@ struct nvd_disk *ndisk; struct disk *disk; struct nvd_controller *ctrlr = ctrlr_arg; - + ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK); disk = disk_alloc(); @@ -418,6 +424,33 @@ mtx_destroy(&ndisk->bioqlock); } +/* + * namemspace disks are removed at detach time + * from the periph's perspective. + */ +void * +nvd_remove_disk(int nsid, void *ctrlr_arg) { + + struct nvd_controller *ctrlr = ctrlr_arg; + struct nvd_disk *disk, *tdisk; + + if (TAILQ_EMPTY(&ctrlr->disk_head)) { + printf("nvd_remove_disk ctrlr_arg %p has no disks!\n", + ctrlr); + return (NULL); + } + + TAILQ_FOREACH_SAFE(disk, (&ctrlr->disk_head), ctrlr_tailq, tdisk ) { + if (disk->ns->id == nsid) { + TAILQ_REMOVE(&disk_head, disk, global_tailq); + TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq); + destroy_geom_disk(disk); + free(disk, M_NVD); + return (NULL); + } + } + return (NULL); +} static void nvd_controller_fail(void *ctrlr_arg) Index: sys/dev/nvme/nvme.h =================================================================== --- sys/dev/nvme/nvme.h +++ sys/dev/nvme/nvme.h @@ -341,10 +341,11 @@ NVME_OPC_GET_FEATURES = 0x0a, /* 0x0b - reserved */ NVME_OPC_ASYNC_EVENT_REQUEST = 0x0c, - /* 0x0d-0x0f - reserved */ + NVME_OPC_NAMESPACE_MANAGEMENT = 0x0d, + /* 0x0e-0x0f - reserved */ NVME_OPC_FIRMWARE_ACTIVATE = 0x10, NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11, - + NVME_OPC_NAMESPACE_ATTACHMENT = 0x15, NVME_OPC_FORMAT_NVM = 0x80, NVME_OPC_SECURITY_SEND = 0x81, NVME_OPC_SECURITY_RECEIVE = 0x82, @@ -456,7 +457,9 @@ /** maximum data transfer size */ uint8_t mdts; - uint8_t reserved1[178]; + uint16_t ctrlr_id; + + uint8_t reserved[176]; /* bytes 256-511: admin command set attributes */ @@ -470,8 +473,9 @@ /* supports firmware activate/download commands */ uint16_t firmware : 1; - - uint16_t oacs_rsvd : 13; + /* supports namespace management commands */ + uint16_t nsmgmt : 1; + uint16_t oacs_rsvd : 12; } __packed oacs; /** abort command limit */ @@ -513,8 +517,15 @@ uint8_t avscc_rsvd : 7; } __packed avscc; - uint8_t reserved2[247]; + uint8_t reserved2[15]; + struct { + /* if nsmgmt, report tnvmcap and unvmcap */ + uint8_t tnvmcap[16]; + uint8_t unvmcap[16]; + } __packed untncap; + + uint8_t reserved3[200]; /* bytes 512-703: nvm command set attributes */ /** submission queue entry size */ @@ -529,7 +540,7 @@ uint8_t max : 4; } __packed cqes; - uint8_t reserved3[2]; + uint8_t reserved4[2]; /** number of namespaces */ uint32_t nn; @@ -555,10 +566,10 @@ } __packed vwc; /* TODO: flesh out remaining nvm command set attributes */ - uint8_t reserved4[178]; + uint8_t reserved5[178]; /* bytes 704-2047: i/o command set attributes */ - uint8_t reserved5[1344]; + uint8_t reserved6[1344]; /* bytes 2048-3071: power state descriptors */ struct nvme_power_state power_state[32]; @@ -847,6 +858,13 @@ * by the caller. */ struct mtx * driver_lock; + struct nvme_controller *ctrlr; +}; + +/* attachment support */ +struct ctrlr_list { + uint16_t ctrlr_cnt; + uint16_t ctrlrs[1]; /* no shared ns for now xxx */ }; #define nvme_completion_is_error(cpl) \ @@ -869,6 +887,7 @@ typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *, uint32_t, void *, uint32_t); typedef void (*nvme_cons_fail_fn_t)(void *); +typedef void *(*nvme_cons_rm_ns_fn_t)(int nsid, void *); enum nvme_namespace_flags { NVME_NS_DEALLOCATE_SUPPORTED = 0x1, @@ -917,7 +936,8 @@ struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn, nvme_cons_async_fn_t async_fn, - nvme_cons_fail_fn_t fail_fn); + nvme_cons_fail_fn_t fail_fn, + nvme_cons_rm_ns_fn_t rm_ns_fn); void nvme_unregister_consumer(struct nvme_consumer *consumer); /* Controller helper functions */ Index: sys/dev/nvme/nvme.c =================================================================== --- sys/dev/nvme/nvme.c +++ sys/dev/nvme/nvme.c @@ -45,6 +45,7 @@ nvme_cons_ctrlr_fn_t ctrlr_fn; nvme_cons_async_fn_t async_fn; nvme_cons_fail_fn_t fail_fn; + nvme_cons_rm_ns_fn_t rm_ns_fn; }; struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS]; @@ -290,13 +291,49 @@ return (0); } +/* + * For geom plumbed periph consumer(s) use notify to call the + * given namespace function (new disk) + * + */ +void +nvme_notify_namespace(struct nvme_controller *ctrlr, int ns_idx, int op) +{ + struct nvme_namespace *ns; + struct nvme_consumer *cons; + void *ctrlr_cookie = NULL; + int i; + + for (i = 0; i < NVME_MAX_CONSUMERS; i++) { + + if (nvme_consumer[i].id != INVALID_CONSUMER_ID) { + ns = &ctrlr->ns[ns_idx]; + cons = &nvme_consumer[i]; + + if (ns->data.nsze == 0) { + continue; + } + ctrlr_cookie = ctrlr->cons_cookie[i]; + + if (op && cons->ns_fn != NULL) { + ns->cons_cookie[cons->id] = + (*cons->ns_fn)(ns, ctrlr_cookie); + + } else { + ns->cons_cookie[cons->id] = + (*cons->rm_ns_fn)(ns_idx, ctrlr_cookie); + } + } + } +} + static void nvme_notify(struct nvme_consumer *cons, struct nvme_controller *ctrlr) { struct nvme_namespace *ns; void *ctrlr_cookie; - int cmpset, ns_idx; + int ns_idx; /* * The consumer may register itself after the nvme devices @@ -308,16 +345,18 @@ if (!ctrlr->is_initialized) return; - cmpset = atomic_cmpset_32(&ctrlr->notification_sent, 0, 1); - - if (cmpset == 0) - return; + if (ctrlr->cons_cookie[cons->id] == NULL) { + if (cons->ctrlr_fn != NULL) { + ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr); + } else { + ctrlr_cookie = NULL; + } + } else { + ctrlr_cookie = ctrlr->cons_cookie[cons->id]; + } - if (cons->ctrlr_fn != NULL) - ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr); - else - ctrlr_cookie = NULL; ctrlr->cons_cookie[cons->id] = ctrlr_cookie; + if (ctrlr->is_failed) { if (cons->fail_fn != NULL) (*cons->fail_fn)(ctrlr_cookie); @@ -325,15 +364,17 @@ * Do not notify consumers about the namespaces of a * failed controller. */ + nvme_printf(ctrlr,"nvme_notify: notify not sent, is_failed is true\n"); return; } for (ns_idx = 0; ns_idx < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); ns_idx++) { ns = &ctrlr->ns[ns_idx]; if (ns->data.nsze == 0) continue; - if (cons->ns_fn != NULL) - ns->cons_cookie[cons->id] = - (*cons->ns_fn)(ns, ctrlr_cookie); + if (cons->ns_fn != NULL) { + ns->cons_cookie[cons->id] = /* ctrl->cons_cookie vs ns->cons_cookie ? */ + (*cons->ns_fn)(ns, ctrlr_cookie); + } } } @@ -408,8 +449,8 @@ struct nvme_consumer * nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn, - nvme_cons_async_fn_t async_fn, - nvme_cons_fail_fn_t fail_fn) + nvme_cons_async_fn_t async_fn, + nvme_cons_fail_fn_t fail_fn, nvme_cons_rm_ns_fn_t rm_ns_fn) { int i; @@ -424,7 +465,7 @@ nvme_consumer[i].ctrlr_fn = ctrlr_fn; nvme_consumer[i].async_fn = async_fn; nvme_consumer[i].fail_fn = fail_fn; - + nvme_consumer[i].rm_ns_fn = rm_ns_fn; nvme_notify_new_consumer(&nvme_consumer[i]); return (&nvme_consumer[i]); } Index: sys/dev/nvme/nvme_ctrlr.c =================================================================== --- sys/dev/nvme/nvme_ctrlr.c +++ sys/dev/nvme/nvme_ctrlr.c @@ -418,7 +418,10 @@ */ ctrlr->num_io_queues = min(ctrlr->num_io_queues, sq_allocated); ctrlr->num_io_queues = min(ctrlr->num_io_queues, cq_allocated); - + + nvme_printf(ctrlr,"num_qpairs: ctrlr->num_io_queues = %d\n", + ctrlr->num_io_queues); + return (0); } @@ -850,6 +853,101 @@ return (0); } +struct ns_create_arg { + uint16_t nsid; + struct nvme_controller *ctrlr; +}; + +static void +nvme_ns_create(void *arg, int pending) +{ + struct nvme_controller *ctrlr; + struct nvme_sim_softc *sc; + struct ns_create_arg *nsarg = (struct ns_create_arg *)arg; + int nsid = nsarg->nsid; + + ctrlr = nsarg->ctrlr; + sc = ctrlr->sim_softc; + nvme_ns_construct(&ctrlr->ns[nsid], nsid, ctrlr); + nvme_sim_new_ns(&ctrlr->ns[nsid], (void*)sc); + nvme_notify_namespace(ctrlr, nsid, 1); /* new disk */ + free(arg, M_NVME); + return; +} + +static void nvme_ns_detach(void *arg, int pending) +{ + struct ns_create_arg *nsarg = arg; + struct nvme_controller *ctrlr = nsarg->ctrlr; + int nsid = nsarg->nsid; + + nvme_notify_namespace(ctrlr, nsid, 0); /* remove disk */ +} + +static void +nvme_ns_mgmt_done(void *arg, const struct nvme_completion *cpl) +{ + struct nvme_pt_command *pt = (struct nvme_pt_command *)arg; + struct nvme_controller *ctrlr = pt->ctrlr; + + bzero(&pt->cpl, sizeof(pt->cpl)); + pt->cpl.cdw0 = cpl->cdw0; + pt->cpl.status = cpl->status; + pt->cpl.status.p = 0; + + if (pt->cmd.cdw10 == 1 && cpl->status.sc == 0) { /* good delete */ + memset(&ctrlr->ns[pt->cmd.nsid], 0, sizeof(struct nvme_namespace)); + } + + mtx_lock(pt->driver_lock); + wakeup(pt); + mtx_unlock(pt->driver_lock); +} + + + +static void +nvme_ns_attachment_done(void *arg, const struct nvme_completion *cpl) +{ + struct nvme_pt_command *pt = arg; + struct ns_create_arg *ns_arg = NULL; + struct nvme_controller *ctrlr = pt->ctrlr; + + bzero(&pt->cpl, sizeof(pt->cpl)); + pt->cpl.cdw0 = cpl->cdw0; + pt->cpl.status = cpl->status; + pt->cpl.status.p = 0; + + + if (cpl->status.sc == 0) { /* setup task entry */ + ns_arg = malloc(sizeof(struct ns_create_arg), M_NVME, M_ZERO | M_NOWAIT); + ns_arg->nsid = pt->cmd.nsid; + ns_arg->ctrlr = ctrlr; + memset(&ctrlr->ns_task, 0, sizeof (struct task)); + + ctrlr->ns_task.ta_context = ns_arg; + ctrlr->ns_task.ta_pending = 0; + + if (pt->cmd.cdw10 == 0) { /* good attach */ + ctrlr->ns_task.ta_func = nvme_ns_create; /* create + attach = usable create xxx */ + + } else if (pt->cmd.cdw10 == 1) { /* good detach */ + ctrlr->ns_task.ta_func = nvme_ns_detach;; + } + taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->ns_task); + + } else { + nvme_printf(pt->ctrlr, "%s-E-unhandled opc: 0x%x cdw10 value %d completed\n", + __FUNCTION__, pt->cmd.opc, pt->cmd.cdw10); + } + + mtx_lock(pt->driver_lock); + wakeup(pt); + mtx_unlock(pt->driver_lock); + +} + + static void nvme_pt_done(void *arg, const struct nvme_completion *cpl) { @@ -912,12 +1010,27 @@ ret = EFAULT; goto err; } - req = nvme_allocate_request_vaddr(buf->b_data, pt->len, - nvme_pt_done, pt); - } else - req = nvme_allocate_request_vaddr(pt->buf, pt->len, - nvme_pt_done, pt); - } else + + if (is_admin_cmd && pt->cmd.opc == NVME_OPC_NAMESPACE_MANAGEMENT) { + pt->ctrlr = ctrlr; + req = nvme_allocate_request_vaddr(buf->b_data, pt->len, + nvme_ns_mgmt_done, pt); + } else if (is_admin_cmd && pt->cmd.opc == NVME_OPC_NAMESPACE_ATTACHMENT) { + pt->ctrlr = ctrlr; + req = nvme_allocate_request_vaddr(buf->b_data, pt->len, + nvme_ns_attachment_done, pt); + + } else if (pt->cmd.opc == NVME_OPC_NAMESPACE_ATTACHMENT) { + req = nvme_allocate_request_vaddr(buf->b_data, pt->len, + nvme_ns_attachment_done, pt); + } else { + req = nvme_allocate_request_vaddr(buf->b_data, pt->len, + nvme_pt_done, pt); + } + } else + req = nvme_allocate_request_vaddr(buf->b_data, pt->len, + nvme_pt_done, pt); + } else req = nvme_allocate_request_null(nvme_pt_done, pt); req->cmd.opc = pt->cmd.opc; Index: sys/dev/nvme/nvme_ns.c =================================================================== --- sys/dev/nvme/nvme_ns.c +++ sys/dev/nvme/nvme_ns.c @@ -508,6 +508,7 @@ DELAY(5); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_identify_namespace failed\n"); + ns->id = 0; return (ENXIO); } @@ -517,9 +518,10 @@ * standard says the entire id will be zeros, so this is a * cheap way to test for that. */ - if (ns->data.nsze == 0) + if (ns->data.nsze == 0) { + ns->id = 0; return (ENXIO); - + } /* * Note: format is a 0-based value, so > is appropriate here, * not >=. Index: sys/dev/nvme/nvme_private.h =================================================================== --- sys/dev/nvme/nvme_private.h +++ sys/dev/nvme/nvme_private.h @@ -319,6 +319,9 @@ uint32_t notification_sent; boolean_t is_failed; + void *sim_softc; + struct task ns_task; + STAILQ_HEAD(, nvme_request) fail_req; }; @@ -527,4 +530,7 @@ void nvme_ctrlr_intx_handler(void *arg); +void *nvme_sim_new_ns(struct nvme_namespace *ns, void *sc_arg); +void nvme_notify_namespace(struct nvme_controller *ctrlr, int nsid, int op); + #endif /* __NVME_PRIVATE_H__ */ Index: sys/dev/nvme/nvme_sim.c =================================================================== --- sys/dev/nvme/nvme_sim.c +++ sys/dev/nvme/nvme_sim.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include // Yes, this is wrong. #include @@ -169,7 +170,7 @@ cpi->hba_eng_cnt = 0; cpi->max_target = 0; cpi->max_lun = ctrlr->cdata.nn; - cpi->maxio = nvme_ns_get_max_io_xfer_size(ns); + cpi->maxio = ctrlr->max_xfer_size; cpi->initiator_id = 0; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 4000000; /* 4 GB/s 4 lanes pcie 3 */ @@ -181,7 +182,14 @@ cpi->transport_version = 1; /* XXX Get PCIe spec ? */ cpi->protocol = PROTO_NVME; cpi->protocol_version = NVME_REV_1; /* Groks all 1.x NVMe cards */ - cpi->xport_specific.nvme.nsid = ns->id; + /* it's only the first base namespace that suffers from + * a path inq before it's created + */ + if (ns) { + cpi->xport_specific.nvme.nsid = ns->id; + } else { + cpi->xport_specific.nvme.nsid = 1; + } cpi->ccb_h.status = CAM_REQ_CMP; break; } @@ -248,12 +256,16 @@ int max_trans; int unit; struct nvme_sim_softc *sc = NULL; + int err; + + max_trans = 256;/* XXX not so simple -- must match queues */ + // max_trans = ctrlr->num_io_queues; unit = device_get_unit(ctrlr->dev); devq = cam_simq_alloc(max_trans); if (devq == NULL) - return NULL; + return (NULL); sc = malloc(sizeof(*sc), M_NVME, M_ZERO | M_WAITOK); @@ -265,8 +277,16 @@ printf("Failed to allocate a sim\n"); cam_simq_free(devq); free(sc, M_NVME); - return NULL; + return (NULL); } + err = xpt_bus_register(sc->s_sim, ctrlr->dev, 0); + if (err != CAM_SUCCESS) { + nvme_printf(ctrlr,"%s-E-failed xpt_bus_register : %d\n", + __FUNCTION__, err); + } + + ctrlr->sim_softc = sc; /* ok, now it's clear, don't need this since we + * save this in the consumers 'cookie' */ return sc; } @@ -291,30 +311,14 @@ xpt_rescan(ccb); } -static void * +void * nvme_sim_new_ns(struct nvme_namespace *ns, void *sc_arg) { struct nvme_sim_softc *sc = sc_arg; struct nvme_controller *ctrlr = sc->s_ctrlr; - int i; sc->s_ns = ns; - - /* - * XXX this is creating one bus per ns, but it should be one - * XXX target per controller, and one LUN per namespace. - * XXX Current drives only support one NS, so there's time - * XXX to fix it later when new drives arrive. - * - * XXX I'm pretty sure the xpt_bus_register() call below is - * XXX like super lame and it really belongs in the sim_new_ctrlr - * XXX callback. Then the create_path below would be pretty close - * XXX to being right. Except we should be per-ns not per-ctrlr - * XXX data. - */ - mtx_lock(&ctrlr->lock); -/* Create bus */ /* * XXX do I need to lock ctrlr->lock ? @@ -324,36 +328,22 @@ * time, and nothing is in parallel. */ - i = 0; - if (xpt_bus_register(sc->s_sim, ctrlr->dev, 0) != CAM_SUCCESS) - goto error; - i++; if (xpt_create_path(&sc->s_path, /*periph*/NULL, cam_sim_path(sc->s_sim), - 1, ns->id) != CAM_REQ_CMP) - goto error; - i++; + 1, ns->id) != CAM_REQ_CMP) { + mtx_unlock(&ctrlr->lock); + cam_sim_free(sc->s_sim, /*free_devq*/TRUE); + return (NULL); + } sc->s_path->device->nvme_data = nvme_ns_get_data(ns); sc->s_path->device->nvme_cdata = nvme_ctrlr_get_data(ns->ctrlr); /* Scan bus */ - nvme_sim_rescan_target(ctrlr, sc->s_path); + nvme_sim_rescan_target(ctrlr, sc->s_path); mtx_unlock(&ctrlr->lock); - return ns; - -error: - switch (i) { - case 2: - xpt_free_path(sc->s_path); - case 1: - xpt_bus_deregister(cam_sim_path(sc->s_sim)); - case 0: - cam_sim_free(sc->s_sim, /*free_devq*/TRUE); - } - mtx_unlock(&ctrlr->lock); - return NULL; + return (ns); } static void @@ -361,6 +351,15 @@ { /* XXX cleanup XXX */ } +static void * +nvme_sim_rm_namespace(int nsid, void *ctrlr_arg) +{ + struct nvme_sim_softc *sc = ctrlr_arg; + + xpt_async(AC_LOST_DEVICE, sc->s_path, NULL); + xpt_free_path(sc->s_path); + return (NULL); +} struct nvme_consumer *consumer_cookie; @@ -369,7 +368,10 @@ { consumer_cookie = nvme_register_consumer(nvme_sim_new_ns, - nvme_sim_new_controller, NULL, nvme_sim_controller_fail); + nvme_sim_new_controller, + NULL, + nvme_sim_controller_fail, + nvme_sim_rm_namespace); } SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY,