Page MenuHomeFreeBSD

D26767.diff
No OneTemporary

D26767.diff

Index: sys/dev/nvme/nvme_private.h
===================================================================
--- sys/dev/nvme/nvme_private.h
+++ sys/dev/nvme/nvme_private.h
@@ -159,7 +159,7 @@
TAILQ_ENTRY(nvme_tracker) tailq;
struct nvme_request *req;
struct nvme_qpair *qpair;
- struct callout timer;
+ sbintime_t deadline;
bus_dmamap_t payload_dma_map;
uint16_t cid;
@@ -167,6 +167,12 @@
bus_addr_t prp_bus_addr;
};
+enum nvme_recovery {
+ RECOVERY_NONE = 0, /* Normal operations */
+ RECOVERY_START, /* Deadline has passed, start recovering */
+ RECOVERY_RESET, /* This pass, initiate reset of controller */
+ RECOVERY_WAITING, /* waiting for the reset to complete */
+};
struct nvme_qpair {
struct nvme_controller *ctrlr;
uint32_t id;
@@ -178,6 +184,12 @@
struct resource *res;
void *tag;
+ struct callout timer;
+ sbintime_t deadline;
+ bool timer_armed;
+ enum nvme_recovery recovery_state;
+ int ticks;
+
uint32_t num_entries;
uint32_t num_trackers;
uint32_t sq_tdbl_off;
@@ -209,8 +221,6 @@
struct nvme_tracker **act_tr;
- bool is_enabled;
-
struct mtx lock __aligned(CACHE_LINE_SIZE);
} __aligned(CACHE_LINE_SIZE);
Index: sys/dev/nvme/nvme_qpair.c
===================================================================
--- sys/dev/nvme/nvme_qpair.c
+++ sys/dev/nvme/nvme_qpair.c
@@ -452,7 +452,6 @@
}
mtx_lock(&qpair->lock);
- callout_stop(&tr->timer);
if (retry) {
req->retries++;
@@ -479,6 +478,8 @@
req = STAILQ_FIRST(&qpair->queued_req);
STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
_nvme_qpair_submit_request(qpair, req);
+ } else if (TAILQ_EMPTY(&qpair->outstanding_tr)) {
+ qpair->deadline = SBT_MAX;
}
}
@@ -544,7 +545,7 @@
* progress. Ignore the interrupt - any I/O that was associated with
* this interrupt will get retried when the reset is complete.
*/
- if (!qpair->is_enabled)
+ if (qpair->recovery_state != RECOVERY_NONE)
return (false);
/*
@@ -741,6 +742,11 @@
qpair->cpl_bus_addr = queuemem_phys + cmdsz;
prpmem_phys = queuemem_phys + cmdsz + cplsz;
+ callout_init(&qpair->timer, 1);
+ qpair->timer_armed = false;
+ qpair->deadline = SBT_MAX;
+ qpair->recovery_state = RECOVERY_NONE;
+
/*
* Calcuate the stride of the doorbell register. Many emulators set this
* value to correspond to a cache line. However, some hardware has set
@@ -778,7 +784,6 @@
DOMAINSET_PREF(qpair->domain), M_ZERO | M_WAITOK);
bus_dmamap_create(qpair->dma_tag_payload, 0,
&tr->payload_dma_map);
- callout_init(&tr->timer, 1);
tr->cid = i;
tr->qpair = qpair;
tr->prp = (uint64_t *)prp_list;
@@ -886,6 +891,7 @@
nvme_qpair_destroy(qpair);
}
+#if 0
static void
nvme_abort_complete(void *arg, const struct nvme_completion *status)
{
@@ -945,7 +951,117 @@
nvme_ctrlr_reset(ctrlr);
}
}
+#else
+static void
+nvme_qpair_timeout(void *arg)
+{
+ struct nvme_qpair *qpair = arg;
+ struct nvme_controller *ctrlr = qpair->ctrlr;
+ struct nvme_tracker *tr;
+ struct nvme_tracker *tr_temp;
+ sbintime_t now;
+ bool idle;
+ uint32_t csts;
+ uint8_t cfs;
+
+ mtx_lock(&qpair->lock);
+ idle = TAILQ_EMPTY(&qpair->outstanding_tr);
+ if (idle && qpair->deadline != SBT_MAX) {
+ nvme_printf(ctrlr, "Had to reset deadline from %jd to MAX due to idle\n",
+ qpair->deadline);
+ qpair->deadline = SBT_MAX;
+ }
+again:
+ switch (qpair->recovery_state) {
+ case RECOVERY_NONE:
+ now = getsbinuptime();
+ if (!idle && now > qpair->deadline) {
+ qpair->deadline = SBT_MAX;
+ TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
+ if (now > tr->deadline) {
+ /*
+ * We're now passed our earliest deadline. We need to do
+ * expensive things to cope, but next time. Flag that and
+ * close the door to any further processing.
+ */
+ qpair->recovery_state = RECOVERY_START;
+ nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd %jd\n",
+ (uintmax_t)now, (uintmax_t)qpair->deadline,
+ (uintmax_t)tr->deadline);
+ qpair->deadline = SBT_MAX;
+ break;
+ }
+ if (qpair->deadline > tr->deadline) {
+ nvme_printf(ctrlr, "Adjusting: %jd to %jd\n",
+ (uintmax_t)qpair->deadline,
+ (uintmax_t)tr->deadline);
+ qpair->deadline = tr->deadline;
+ }
+ }
+ }
+ break;
+ case RECOVERY_START:
+ /*
+ * Read csts to get value of cfs - controller fatal status.
+ * If no fatal status, try to call the completion routine, and
+ * if completes transactions, report a missed interrupt and
+ * return (this may need to be rate limited). Otherwise, if
+ * aborts are enabled and the controller is not reporting
+ * fatal status, abort the command. Otherwise, just reset the
+ * controller and hope for the best.
+ */
+ csts = nvme_mmio_read_4(ctrlr, csts);
+ cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK;
+ if (cfs) {
+ nvme_printf(ctrlr, "Controller in fatal status, resetting\n");
+ qpair->recovery_state = RECOVERY_RESET;
+ goto again;
+ }
+ mtx_unlock(&qpair->lock);
+ if (nvme_qpair_process_completions(qpair)) {
+ nvme_printf(ctrlr, "Missing interrupt\n");
+ qpair->recovery_state = RECOVERY_NONE;
+ // XXX do I need a NOP here to not race?
+ } else {
+ nvme_printf(ctrlr, "missed interrupt with nothing complete\n");
+ qpair->recovery_state = RECOVERY_RESET;
+ mtx_lock(&qpair->lock);
+ goto again;
+ }
+ mtx_lock(&qpair->lock);
+ break;
+ case RECOVERY_RESET:
+ nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
+ (csts == 0xffffffff) ? " and possible hot unplug" :
+ (cfs ? " and fatal error status" : ""));
+ nvme_printf(ctrlr, "RECOVERY_WAITING\n");
+ qpair->recovery_state = RECOVERY_WAITING;
+ nvme_ctrlr_reset(ctrlr);
+ break;
+ case RECOVERY_WAITING:
+ nvme_printf(ctrlr, "waiting\n");
+ break;
+ }
+
+ /*
+ * Rearm the timeout.
+ */
+ if (!idle) {
+ callout_reset_on(&qpair->timer, qpair->ticks,
+ nvme_qpair_timeout, qpair, qpair->cpu);
+ } else {
+ qpair->timer_armed = false;
+ qpair->deadline = SBT_MAX;
+ }
+ mtx_unlock(&qpair->lock);
+}
+#endif
+
+/*
+ * Submit the tracker to the hardware. Must already be in the
+ * outstanding queue when called.
+ */
void
nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
{
@@ -962,12 +1078,18 @@
if (req->timeout) {
if (req->cb_fn == nvme_completion_poll_cb)
- timeout = hz;
+ timeout = 1;
else
- timeout = ctrlr->timeout_period * hz;
- callout_reset_on(&tr->timer, timeout, nvme_timeout, tr,
- qpair->cpu);
+ timeout = ctrlr->timeout_period;
+ tr->deadline = getsbinuptime() + timeout * SBT_1S;
+ if (!qpair->timer_armed) {
+ qpair->ticks = hz / 2;
+ qpair->timer_armed = true;
+ callout_reset_on(&qpair->timer, qpair->ticks,
+ nvme_qpair_timeout, qpair, qpair->cpu);
+ }
}
+ qpair->deadline = TAILQ_FIRST(&qpair->outstanding_tr)->deadline;
/* Copy the command from the tracker to the submission queue. */
memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
@@ -1050,7 +1172,7 @@
tr = TAILQ_FIRST(&qpair->free_tr);
req->qpair = qpair;
- if (tr == NULL || !qpair->is_enabled) {
+ if (tr == NULL || qpair->recovery_state != RECOVERY_NONE) {
/*
* No tracker is available, or the qpair is disabled due to
* an in-progress controller-level reset or controller
@@ -1147,8 +1269,9 @@
static void
nvme_qpair_enable(struct nvme_qpair *qpair)
{
+ mtx_assert(&qpair->lock, MA_OWNED);
- qpair->is_enabled = true;
+ qpair->recovery_state = RECOVERY_NONE;
}
void
@@ -1191,7 +1314,9 @@
NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
}
+ mtx_lock(&qpair->lock);
nvme_qpair_enable(qpair);
+ mtx_unlock(&qpair->lock);
}
void
@@ -1234,12 +1359,9 @@
static void
nvme_qpair_disable(struct nvme_qpair *qpair)
{
- struct nvme_tracker *tr;
- qpair->is_enabled = false;
mtx_lock(&qpair->lock);
- TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
- callout_stop(&tr->timer);
+ qpair->recovery_state = RECOVERY_WAITING;
mtx_unlock(&qpair->lock);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 28, 1:11 AM (9 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16231771
Default Alt Text
D26767.diff (7 KB)

Event Timeline