Index: sys/dev/nvme/nvme_private.h =================================================================== --- sys/dev/nvme/nvme_private.h +++ sys/dev/nvme/nvme_private.h @@ -159,7 +159,7 @@ TAILQ_ENTRY(nvme_tracker) tailq; struct nvme_request *req; struct nvme_qpair *qpair; - struct callout timer; + sbintime_t deadline; bus_dmamap_t payload_dma_map; uint16_t cid; @@ -167,6 +167,12 @@ bus_addr_t prp_bus_addr; }; +enum nvme_recovery { + RECOVERY_NONE = 0, /* Normal operations */ + RECOVERY_START, /* Deadline has passed, start recovering */ + RECOVERY_RESET, /* This pass, initiate reset of controller */ + RECOVERY_WAITING, /* waiting for the reset to complete */ +}; struct nvme_qpair { struct nvme_controller *ctrlr; uint32_t id; @@ -178,6 +184,12 @@ struct resource *res; void *tag; + struct callout timer; + sbintime_t deadline; + bool timer_armed; + enum nvme_recovery recovery_state; + int ticks; + uint32_t num_entries; uint32_t num_trackers; uint32_t sq_tdbl_off; @@ -209,8 +221,6 @@ struct nvme_tracker **act_tr; - bool is_enabled; - struct mtx lock __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); Index: sys/dev/nvme/nvme_qpair.c =================================================================== --- sys/dev/nvme/nvme_qpair.c +++ sys/dev/nvme/nvme_qpair.c @@ -452,7 +452,6 @@ } mtx_lock(&qpair->lock); - callout_stop(&tr->timer); if (retry) { req->retries++; @@ -479,6 +478,8 @@ req = STAILQ_FIRST(&qpair->queued_req); STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); _nvme_qpair_submit_request(qpair, req); + } else if (TAILQ_EMPTY(&qpair->outstanding_tr)) { + qpair->deadline = SBT_MAX; } } @@ -544,7 +545,7 @@ * progress. Ignore the interrupt - any I/O that was associated with * this interrupt will get retried when the reset is complete. */ - if (!qpair->is_enabled) + if (qpair->recovery_state != RECOVERY_NONE) return (false); /* @@ -741,6 +742,11 @@ qpair->cpl_bus_addr = queuemem_phys + cmdsz; prpmem_phys = queuemem_phys + cmdsz + cplsz; + callout_init(&qpair->timer, 1); + qpair->timer_armed = false; + qpair->deadline = SBT_MAX; + qpair->recovery_state = RECOVERY_NONE; + /* * Calcuate the stride of the doorbell register. Many emulators set this * value to correspond to a cache line. However, some hardware has set @@ -778,7 +784,6 @@ DOMAINSET_PREF(qpair->domain), M_ZERO | M_WAITOK); bus_dmamap_create(qpair->dma_tag_payload, 0, &tr->payload_dma_map); - callout_init(&tr->timer, 1); tr->cid = i; tr->qpair = qpair; tr->prp = (uint64_t *)prp_list; @@ -886,6 +891,7 @@ nvme_qpair_destroy(qpair); } +#if 0 static void nvme_abort_complete(void *arg, const struct nvme_completion *status) { @@ -945,7 +951,117 @@ nvme_ctrlr_reset(ctrlr); } } +#else +static void +nvme_qpair_timeout(void *arg) +{ + struct nvme_qpair *qpair = arg; + struct nvme_controller *ctrlr = qpair->ctrlr; + struct nvme_tracker *tr; + struct nvme_tracker *tr_temp; + sbintime_t now; + bool idle; + uint32_t csts; + uint8_t cfs; + + mtx_lock(&qpair->lock); + idle = TAILQ_EMPTY(&qpair->outstanding_tr); + if (idle && qpair->deadline != SBT_MAX) { + nvme_printf(ctrlr, "Had to reset deadline from %jd to MAX due to idle\n", + qpair->deadline); + qpair->deadline = SBT_MAX; + } +again: + switch (qpair->recovery_state) { + case RECOVERY_NONE: + now = getsbinuptime(); + if (!idle && now > qpair->deadline) { + qpair->deadline = SBT_MAX; + TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { + if (now > tr->deadline) { + /* + * We're now passed our earliest deadline. We need to do + * expensive things to cope, but next time. Flag that and + * close the door to any further processing. + */ + qpair->recovery_state = RECOVERY_START; + nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd %jd\n", + (uintmax_t)now, (uintmax_t)qpair->deadline, + (uintmax_t)tr->deadline); + qpair->deadline = SBT_MAX; + break; + } + if (qpair->deadline > tr->deadline) { + nvme_printf(ctrlr, "Adjusting: %jd to %jd\n", + (uintmax_t)qpair->deadline, + (uintmax_t)tr->deadline); + qpair->deadline = tr->deadline; + } + } + } + break; + case RECOVERY_START: + /* + * Read csts to get value of cfs - controller fatal status. + * If no fatal status, try to call the completion routine, and + * if completes transactions, report a missed interrupt and + * return (this may need to be rate limited). Otherwise, if + * aborts are enabled and the controller is not reporting + * fatal status, abort the command. Otherwise, just reset the + * controller and hope for the best. + */ + csts = nvme_mmio_read_4(ctrlr, csts); + cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK; + if (cfs) { + nvme_printf(ctrlr, "Controller in fatal status, resetting\n"); + qpair->recovery_state = RECOVERY_RESET; + goto again; + } + mtx_unlock(&qpair->lock); + if (nvme_qpair_process_completions(qpair)) { + nvme_printf(ctrlr, "Missing interrupt\n"); + qpair->recovery_state = RECOVERY_NONE; + // XXX do I need a NOP here to not race? + } else { + nvme_printf(ctrlr, "missed interrupt with nothing complete\n"); + qpair->recovery_state = RECOVERY_RESET; + mtx_lock(&qpair->lock); + goto again; + } + mtx_lock(&qpair->lock); + break; + case RECOVERY_RESET: + nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n", + (csts == 0xffffffff) ? " and possible hot unplug" : + (cfs ? " and fatal error status" : "")); + nvme_printf(ctrlr, "RECOVERY_WAITING\n"); + qpair->recovery_state = RECOVERY_WAITING; + nvme_ctrlr_reset(ctrlr); + break; + case RECOVERY_WAITING: + nvme_printf(ctrlr, "waiting\n"); + break; + } + + /* + * Rearm the timeout. + */ + if (!idle) { + callout_reset_on(&qpair->timer, qpair->ticks, + nvme_qpair_timeout, qpair, qpair->cpu); + } else { + qpair->timer_armed = false; + qpair->deadline = SBT_MAX; + } + mtx_unlock(&qpair->lock); +} +#endif + +/* + * Submit the tracker to the hardware. Must already be in the + * outstanding queue when called. + */ void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) { @@ -962,12 +1078,18 @@ if (req->timeout) { if (req->cb_fn == nvme_completion_poll_cb) - timeout = hz; + timeout = 1; else - timeout = ctrlr->timeout_period * hz; - callout_reset_on(&tr->timer, timeout, nvme_timeout, tr, - qpair->cpu); + timeout = ctrlr->timeout_period; + tr->deadline = getsbinuptime() + timeout * SBT_1S; + if (!qpair->timer_armed) { + qpair->ticks = hz / 2; + qpair->timer_armed = true; + callout_reset_on(&qpair->timer, qpair->ticks, + nvme_qpair_timeout, qpair, qpair->cpu); + } } + qpair->deadline = TAILQ_FIRST(&qpair->outstanding_tr)->deadline; /* Copy the command from the tracker to the submission queue. */ memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); @@ -1050,7 +1172,7 @@ tr = TAILQ_FIRST(&qpair->free_tr); req->qpair = qpair; - if (tr == NULL || !qpair->is_enabled) { + if (tr == NULL || qpair->recovery_state != RECOVERY_NONE) { /* * No tracker is available, or the qpair is disabled due to * an in-progress controller-level reset or controller @@ -1147,8 +1269,9 @@ static void nvme_qpair_enable(struct nvme_qpair *qpair) { + mtx_assert(&qpair->lock, MA_OWNED); - qpair->is_enabled = true; + qpair->recovery_state = RECOVERY_NONE; } void @@ -1191,7 +1314,9 @@ NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL); } + mtx_lock(&qpair->lock); nvme_qpair_enable(qpair); + mtx_unlock(&qpair->lock); } void @@ -1234,12 +1359,9 @@ static void nvme_qpair_disable(struct nvme_qpair *qpair) { - struct nvme_tracker *tr; - qpair->is_enabled = false; mtx_lock(&qpair->lock); - TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) - callout_stop(&tr->timer); + qpair->recovery_state = RECOVERY_WAITING; mtx_unlock(&qpair->lock); }