Index: sys/cam/nvme/nvme_da.c =================================================================== --- sys/cam/nvme/nvme_da.c +++ sys/cam/nvme/nvme_da.c @@ -640,17 +640,51 @@ cam_periph_lock(periph); } +static void +ndasetgeom(struct nda_softc *softc, struct cam_periph *periph) +{ + struct disk *disk = softc->disk; + struct ccb_pathinq cpi; + const struct nvme_namespace_data *nsd; + const struct nvme_controller_data *cd; + uint8_t flbas_fmt, lbads, vwc_present; + + nsd = nvme_get_identify_ns(periph); + cd = nvme_get_identify_cntrl(periph); + + flbas_fmt = (nsd->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; + lbads = (nsd->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & + NVME_NS_DATA_LBAF_LBADS_MASK; + disk->d_sectorsize = 1 << lbads; + disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); + disk->d_delmaxsize = disk->d_mediasize; + disk->d_flags = DISKFLAG_DIRECT_COMPLETION; + if (nvme_ctrlr_has_dataset_mgmt(cd)) + disk->d_flags |= DISKFLAG_CANDELETE; + vwc_present = (cd->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & + NVME_CTRLR_DATA_VWC_PRESENT_MASK; + if (vwc_present) + disk->d_flags |= DISKFLAG_CANFLUSHCACHE; + if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { + disk->d_flags |= DISKFLAG_UNMAPPED_BIO; + softc->unmappedio = 1; + } +} + static void ndaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct cam_periph *periph; + struct nda_softc *softc; + struct ccb_getdev *cgd; + int error; periph = (struct cam_periph *)callback_arg; switch (code) { case AC_FOUND_DEVICE: { - struct ccb_getdev *cgd; cam_status status; cgd = (struct ccb_getdev *)arg; @@ -677,14 +711,27 @@ "due to status 0x%x\n", status); break; } + case AC_GETDEV_CHANGED: + { + softc = (struct nda_softc *)periph->softc; + + /* + * Update our information based on the new Identify data. + */ + ndasetgeom(softc, periph); + error = disk_resize(softc->disk, M_NOWAIT); + if (error != 0) { + xpt_print(periph->path, "disk_resize(9) failed, error = %d\n", error); + break; + } + break; + } case AC_ADVINFO_CHANGED: { uintptr_t buftype; buftype = (uintptr_t)arg; if (buftype == CDAI_TYPE_PHYS_PATH) { - struct nda_softc *softc; - softc = periph->softc; disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); @@ -843,7 +890,6 @@ const struct nvme_namespace_data *nsd; const struct nvme_controller_data *cd; char announce_buf[80]; - uint8_t flbas_fmt, lbads, vwc_present; u_int maxio; int quirks; @@ -908,24 +954,8 @@ else if (maxio > maxphys) maxio = maxphys; /* for safety */ disk->d_maxsize = maxio; - flbas_fmt = (nsd->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & - NVME_NS_DATA_FLBAS_FORMAT_MASK; - lbads = (nsd->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & - NVME_NS_DATA_LBAF_LBADS_MASK; - disk->d_sectorsize = 1 << lbads; - disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); - disk->d_delmaxsize = disk->d_mediasize; - disk->d_flags = DISKFLAG_DIRECT_COMPLETION; - if (nvme_ctrlr_has_dataset_mgmt(cd)) - disk->d_flags |= DISKFLAG_CANDELETE; - vwc_present = (cd->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & - NVME_CTRLR_DATA_VWC_PRESENT_MASK; - if (vwc_present) - disk->d_flags |= DISKFLAG_CANFLUSHCACHE; - if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { - disk->d_flags |= DISKFLAG_UNMAPPED_BIO; - softc->unmappedio = 1; - } + + ndasetgeom(softc, periph); /* * d_ident and d_descr are both far bigger than the length of either * the serial or model number strings. @@ -991,7 +1021,7 @@ * Register for device going away and info about the drive * changing (though with NVMe, it can't) */ - xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, + xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED | AC_GETDEV_CHANGED, ndaasync, periph, periph->path); softc->state = NDA_STATE_NORMAL; Index: sys/dev/nvd/nvd.c =================================================================== --- sys/dev/nvd/nvd.c +++ sys/dev/nvd/nvd.c @@ -62,6 +62,7 @@ static void nvd_done(void *arg, const struct nvme_completion *cpl); static void nvd_gone(struct nvd_disk *ndisk); +static void *nvd_ns_change(struct nvme_namespace *ns, void *ctrlr); static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr); static void *nvd_new_controller(struct nvme_controller *ctrlr); @@ -156,7 +157,7 @@ TAILQ_INIT(&ctrlr_head); TAILQ_INIT(&disk_head); - consumer_handle = nvme_register_consumer(nvd_new_disk, + consumer_handle = nvme_register_consumer(nvd_ns_change, nvd_new_controller, NULL, nvd_controller_fail); return (consumer_handle != NULL ? 0 : -1); @@ -413,6 +414,49 @@ return (nvd_ctrlr); } +static void +nvd_resize(struct nvd_disk *ndisk) +{ + struct disk *disk = ndisk->disk; + struct nvme_namespace *ns = ndisk->ns; + + disk->d_sectorsize = nvme_ns_get_sector_size(ns); + disk->d_mediasize = (off_t)nvme_ns_get_size(ns); + disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns); + disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); + if (disk->d_delmaxsize > nvd_delete_max) + disk->d_delmaxsize = nvd_delete_max; + + disk_resize(disk, M_NOWAIT); + + printf(NVD_STR"%u: NVMe namespace resized\n", ndisk->unit); + printf(NVD_STR"%u: %juMB (%ju %u byte sectors)\n", disk->d_unit, + (uintmax_t)disk->d_mediasize / (1024*1024), + (uintmax_t)disk->d_mediasize / disk->d_sectorsize, + disk->d_sectorsize); +} + +static void * +nvd_ns_change(struct nvme_namespace *ns, void *ctrlr_arg) +{ + struct nvd_disk *ndisk; + struct nvd_controller *ctrlr = ctrlr_arg; + + if (ns->flags & NVME_NS_FLAG_CHANGED) { + mtx_lock(&nvd_lock); + TAILQ_FOREACH(ndisk, &ctrlr->disk_head, ctrlr_tailq) { + if (ndisk->ns->id == ns->id) { + nvd_resize(ndisk); + } + } + mtx_unlock(&nvd_lock); + return (ctrlr_arg); + } + + nvd_new_disk(ns, ctrlr_arg); + return (ctrlr_arg); +} + static void * nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg) { Index: sys/dev/nvme/nvme_ctrlr.c =================================================================== --- sys/dev/nvme/nvme_ctrlr.c +++ sys/dev/nvme/nvme_ctrlr.c @@ -260,6 +260,36 @@ mtx_unlock(&ctrlr->lock); } +static void +nvme_ctrlr_post_update_ns_request(struct nvme_controller *ctrlr, + struct nvme_namespace *req) +{ + + mtx_lock(&ctrlr->lock); + STAILQ_INSERT_TAIL(&ctrlr->update_ns_req, req, stailq); + mtx_unlock(&ctrlr->lock); + if (!ctrlr->is_dying) + taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->update_ns_task); +} + +static void +nvme_ctrlr_update_ns_req_task(void *arg, int pending) +{ + struct nvme_controller *ctrlr = arg; + struct nvme_namespace *req; + + mtx_lock(&ctrlr->lock); + while ((req = STAILQ_FIRST(&ctrlr->update_ns_req)) != NULL) { + STAILQ_REMOVE_HEAD(&ctrlr->update_ns_req, stailq); + mtx_unlock(&ctrlr->lock); + nvme_ns_construct(req, req->id, ctrlr, NVME_REASON_FLAGGED); + nvme_notify_ns(ctrlr, req->id); + req->flags &= ~NVME_NS_FLAG_CHANGED; + mtx_lock(&ctrlr->lock); + } + mtx_unlock(&ctrlr->lock); +} + /* * Wait for RDY to change. * @@ -586,7 +616,7 @@ for (i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) { ns = &ctrlr->ns[i]; - nvme_ns_construct(ns, i+1, ctrlr); + nvme_ns_construct(ns, i+1, ctrlr, NVME_REASON_RESET); } return (0); @@ -750,13 +780,13 @@ ~health_info->critical_warning; nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr, aer->ctrlr->async_event_config, NULL, NULL); - } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE && - !nvme_use_nvd) { + } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) { nsl = (struct nvme_ns_list *)aer->log_page_buffer; for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) { if (nsl->ns[i] > NVME_MAX_NAMESPACES) break; - nvme_notify_ns(aer->ctrlr, nsl->ns[i]); + nvme_ctrlr_post_update_ns_request(aer->ctrlr, + &aer->ctrlr->ns[nsl->ns[i]-1]); } } @@ -1467,14 +1497,16 @@ */ ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK, taskqueue_thread_enqueue, &ctrlr->taskqueue); - taskqueue_start_threads(&ctrlr->taskqueue, 2, PI_DISK, "nvme taskq"); + taskqueue_start_threads(&ctrlr->taskqueue, 3, PI_DISK, "nvme taskq"); ctrlr->is_resetting = 0; ctrlr->is_initialized = 0; ctrlr->notification_sent = 0; TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr); TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr); + TASK_INIT(&ctrlr->update_ns_task, 0, nvme_ctrlr_update_ns_req_task, ctrlr); STAILQ_INIT(&ctrlr->fail_req); + STAILQ_INIT(&ctrlr->update_ns_req); ctrlr->is_failed = false; make_dev_args_init(&md_args); Index: sys/dev/nvme/nvme_ns.c =================================================================== --- sys/dev/nvme/nvme_ns.c +++ sys/dev/nvme/nvme_ns.c @@ -510,7 +510,7 @@ int nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, - struct nvme_controller *ctrlr) + struct nvme_controller *ctrlr, enum nvme_ctor_reason why) { struct make_dev_args md_args; struct nvme_completion_poll_status status; @@ -549,10 +549,13 @@ * If the size of is zero, chances are this isn't a valid * namespace (eg one that's not been configured yet). The * standard says the entire id will be zeros, so this is a - * cheap way to test for that. + * cheap way to test for that. If we previously added this + * device, then it's now gone. */ - if (ns->data.nsze == 0) - return (ENXIO); + if (ns->data.nsze == 0) { + ns->flags |= NVME_NS_FLAG_GONE; + return ((ns->flags & NVME_NS_FLAG_ADDED) ? 0 : ENXIO); + } flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; @@ -597,10 +600,14 @@ /* * cdev may have already been created, if we are reconstructing the - * namespace after a controller-level reset. + * namespace after a controller-level reset. If not, then flag this + * ns as changed for notification. */ - if (ns->cdev != NULL) + if (ns->cdev != NULL) { + if (why != NVME_REASON_RESET) + ns->flags |= NVME_NS_FLAG_CHANGED; return (0); + } /* * Namespace IDs start at 1, so we need to subtract 1 to create a @@ -619,6 +626,7 @@ return (ENXIO); ns->cdev->si_flags |= SI_UNMAPPED; + ns->flags |= NVME_NS_FLAG_ADDED; return (0); } Index: sys/dev/nvme/nvme_private.h =================================================================== --- sys/dev/nvme/nvme_private.h +++ sys/dev/nvme/nvme_private.h @@ -222,10 +222,14 @@ struct nvme_namespace_data data; uint32_t id; uint32_t flags; +#define NVME_NS_FLAG_ADDED 0x1 +#define NVME_NS_FLAG_CHANGED 0x2 +#define NVME_NS_FLAG_GONE 0x4 struct cdev *cdev; void *cons_cookie[NVME_MAX_CONSUMERS]; uint32_t boundary; struct mtx lock; + STAILQ_ENTRY(nvme_namespace) stailq; }; /* @@ -267,6 +271,7 @@ struct task reset_task; struct task fail_req_task; + struct task update_ns_task; struct taskqueue *taskqueue; /* For shared legacy interrupt. */ @@ -317,6 +322,7 @@ bool is_failed; bool is_dying; STAILQ_HEAD(, nvme_request) fail_req; + STAILQ_HEAD(, nvme_namespace) update_ns_req; /* Host Memory Buffer */ int hmb_nchunks; @@ -333,6 +339,11 @@ uint64_t hmb_desc_paddr; }; +enum nvme_ctor_reason { + NVME_REASON_RESET, /* Controller was reset, rebuilding */ + NVME_REASON_FLAGGED, /* NS was flagged as changed somehow */ +}; + #define nvme_mmio_offsetof(reg) \ offsetof(struct nvme_registers, reg) @@ -442,7 +453,7 @@ void nvme_io_qpair_destroy(struct nvme_qpair *qpair); int nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, - struct nvme_controller *ctrlr); + struct nvme_controller *ctrlr, enum nvme_ctor_reason why); void nvme_ns_destruct(struct nvme_namespace *ns); void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr); Index: sys/dev/nvme/nvme_sim.c =================================================================== --- sys/dev/nvme/nvme_sim.c +++ sys/dev/nvme/nvme_sim.c @@ -326,25 +326,45 @@ nvme_sim_ns_change(struct nvme_namespace *ns, void *sc_arg) { struct nvme_sim_softc *sc = sc_arg; + struct cam_path *tmppath; union ccb *ccb; + if (xpt_create_path(&tmppath, /*periph*/NULL, + cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) { + printf("unable to create path for rescan\n"); + return (NULL); + } + /* + * If it's gone, then signal that and leave. + */ + if (ns->flags & NVME_NS_FLAG_GONE) { + xpt_async(AC_LOST_DEVICE, tmppath, NULL); + xpt_free_path(tmppath); + return (sc_arg); + } + + /* + * If it's changed, then signal that and leave. + */ + if (ns->flags & NVME_NS_FLAG_CHANGED) { + xpt_async(AC_GETDEV_CHANGED, tmppath, NULL); + return (sc_arg); + } + ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return (NULL); } + ccb->ccb_h.path = tmppath; /* * We map the NVMe namespace idea onto the CAM unit LUN. For * each new namespace, we create a new CAM path for it. We then * rescan the path to get it to enumerate. + * + * At the end of the scan, the path is freed, I think... */ - if (xpt_create_path(&ccb->ccb_h.path, /*periph*/NULL, - cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) { - printf("unable to create path for rescan\n"); - xpt_free_ccb(ccb); - return (NULL); - } xpt_rescan(ccb); return (sc_arg);