Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F156387059
D28583.id83669.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D28583.id83669.diff
View Options
Index: sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- sys/dev/nvme/nvme_ctrlr.c
+++ sys/dev/nvme/nvme_ctrlr.c
@@ -232,7 +232,8 @@
mtx_lock(&ctrlr->lock);
STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq);
mtx_unlock(&ctrlr->lock);
- taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
+ if (!ctrlr->is_dying)
+ taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
}
static void
@@ -433,7 +434,8 @@
*/
return;
- taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
+ if (!ctrlr->is_dying)
+ taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
}
static int
@@ -1464,6 +1466,8 @@
{
int gone, i;
+ ctrlr->is_dying = true;
+
if (ctrlr->resource == NULL)
goto nores;
@@ -1483,6 +1487,9 @@
if (ctrlr->cdev)
destroy_dev(ctrlr->cdev);
+ if (ctrlr->taskqueue)
+ taskqueue_free(ctrlr->taskqueue);
+
if (ctrlr->is_initialized) {
if (!gone) {
if (ctrlr->hmb_nchunks > 0)
@@ -1511,9 +1518,6 @@
if (!gone)
nvme_ctrlr_disable(ctrlr);
- if (ctrlr->taskqueue)
- taskqueue_free(ctrlr->taskqueue);
-
if (ctrlr->tag)
bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
Index: sys/dev/nvme/nvme_private.h
===================================================================
--- sys/dev/nvme/nvme_private.h
+++ sys/dev/nvme/nvme_private.h
@@ -151,7 +151,7 @@
TAILQ_ENTRY(nvme_tracker) tailq;
struct nvme_request *req;
struct nvme_qpair *qpair;
- struct callout timer;
+ sbintime_t deadline;
bus_dmamap_t payload_dma_map;
uint16_t cid;
@@ -159,6 +159,12 @@
bus_addr_t prp_bus_addr;
};
+enum nvme_recovery {
+ RECOVERY_NONE = 0, /* Normal operations */
+ RECOVERY_START, /* Deadline has passed, start recovering */
+ RECOVERY_RESET, /* This pass, initiate reset of controller */
+ RECOVERY_WAITING, /* waiting for the reset to complete */
+};
struct nvme_qpair {
struct nvme_controller *ctrlr;
uint32_t id;
@@ -170,6 +176,12 @@
struct resource *res;
void *tag;
+ struct callout timer;
+ sbintime_t deadline;
+ bool timer_armed;
+ enum nvme_recovery recovery_state;
+ int ticks;
+
uint32_t num_entries;
uint32_t num_trackers;
uint32_t sq_tdbl_off;
@@ -201,8 +213,6 @@
struct nvme_tracker **act_tr;
- bool is_enabled;
-
struct mtx lock __aligned(CACHE_LINE_SIZE);
} __aligned(CACHE_LINE_SIZE);
@@ -305,6 +315,7 @@
uint32_t notification_sent;
bool is_failed;
+ bool is_dying;
STAILQ_HEAD(, nvme_request) fail_req;
/* Host Memory Buffer */
Index: sys/dev/nvme/nvme_qpair.c
===================================================================
--- sys/dev/nvme/nvme_qpair.c
+++ sys/dev/nvme/nvme_qpair.c
@@ -452,7 +452,6 @@
}
mtx_lock(&qpair->lock);
- callout_stop(&tr->timer);
if (retry) {
req->retries++;
@@ -479,6 +478,8 @@
req = STAILQ_FIRST(&qpair->queued_req);
STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
_nvme_qpair_submit_request(qpair, req);
+ } else if (TAILQ_EMPTY(&qpair->outstanding_tr)) {
+ qpair->deadline = SBT_MAX;
}
}
@@ -544,7 +545,7 @@
* progress. Ignore the interrupt - any I/O that was associated with
* this interrupt will get retried when the reset is complete.
*/
- if (!qpair->is_enabled)
+ if (qpair->recovery_state != RECOVERY_NONE)
return (false);
bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
@@ -746,6 +747,11 @@
qpair->cpl_bus_addr = queuemem_phys + cmdsz;
prpmem_phys = queuemem_phys + cmdsz + cplsz;
+ callout_init(&qpair->timer, 1);
+ qpair->timer_armed = false;
+ qpair->deadline = SBT_MAX;
+ qpair->recovery_state = RECOVERY_NONE;
+
/*
* Calcuate the stride of the doorbell register. Many emulators set this
* value to correspond to a cache line. However, some hardware has set
@@ -783,7 +789,6 @@
DOMAINSET_PREF(qpair->domain), M_ZERO | M_WAITOK);
bus_dmamap_create(qpair->dma_tag_payload, 0,
&tr->payload_dma_map);
- callout_init(&tr->timer, 1);
tr->cid = i;
tr->qpair = qpair;
tr->prp = (uint64_t *)prp_list;
@@ -813,6 +818,8 @@
{
struct nvme_tracker *tr;
+ callout_drain(&qpair->timer);
+
if (qpair->tag) {
bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
qpair->tag = NULL;
@@ -892,65 +899,126 @@
}
static void
-nvme_abort_complete(void *arg, const struct nvme_completion *status)
+nvme_qpair_timeout(void *arg)
{
- struct nvme_tracker *tr = arg;
-
- /*
- * If cdw0 == 1, the controller was not able to abort the command
- * we requested. We still need to check the active tracker array,
- * to cover race where I/O timed out at same time controller was
- * completing the I/O.
- */
- if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
- /*
- * An I/O has timed out, and the controller was unable to
- * abort it for some reason. Construct a fake completion
- * status, and then complete the I/O's tracker manually.
- */
- nvme_printf(tr->qpair->ctrlr,
- "abort command failed, aborting command manually\n");
- nvme_qpair_manual_complete_tracker(tr,
- NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_ALL);
- }
-}
-
-static void
-nvme_timeout(void *arg)
-{
- struct nvme_tracker *tr = arg;
- struct nvme_qpair *qpair = tr->qpair;
+ struct nvme_qpair *qpair = arg;
struct nvme_controller *ctrlr = qpair->ctrlr;
+ struct nvme_tracker *tr;
+ struct nvme_tracker *tr_temp;
+ sbintime_t now;
+ bool idle;
uint32_t csts;
uint8_t cfs;
- /*
- * Read csts to get value of cfs - controller fatal status.
- * If no fatal status, try to call the completion routine, and
- * if completes transactions, report a missed interrupt and
- * return (this may need to be rate limited). Otherwise, if
- * aborts are enabled and the controller is not reporting
- * fatal status, abort the command. Otherwise, just reset the
- * controller and hope for the best.
- */
- csts = nvme_mmio_read_4(ctrlr, csts);
- cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK;
- if (cfs == 0 && nvme_qpair_process_completions(qpair)) {
- nvme_printf(ctrlr, "Missing interrupt\n");
- return;
+ mtx_lock(&qpair->lock);
+ idle = TAILQ_EMPTY(&qpair->outstanding_tr);
+ if (idle && qpair->deadline != SBT_MAX) {
+ nvme_printf(ctrlr, "Had to reset deadline from %jd to MAX due to idle\n",
+ qpair->deadline);
+ qpair->deadline = SBT_MAX;
}
- if (ctrlr->enable_aborts && cfs == 0) {
- nvme_printf(ctrlr, "Aborting command due to a timeout.\n");
- nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
- nvme_abort_complete, tr);
- } else {
+again:
+ switch (qpair->recovery_state) {
+ case RECOVERY_NONE:
+ now = getsbinuptime();
+ if (!idle && now > qpair->deadline) {
+ qpair->deadline = SBT_MAX;
+ TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
+ if (now > tr->deadline && tr->deadline != 0) {
+ /*
+ * We're now passed our earliest
+ * deadline. We need to do expensive
+ * things to cope, but next time. Flag
+ * that and close the door to any
+ * further processing.
+ */
+ qpair->recovery_state = RECOVERY_START;
+ nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd %jd\n",
+ (uintmax_t)now, (uintmax_t)qpair->deadline,
+ (uintmax_t)tr->deadline);
+ qpair->deadline = SBT_MAX;
+ break;
+ }
+ if (qpair->deadline > tr->deadline && tr->deadline != 0) {
+ /*
+ * If we haven't started recovery, then
+ * we need to adjust the current
+ * outstanding deadlines. Since we
+ * expect the trackers to be in order of
+ * deadline we print when we have to
+ * adjust the qpair deadline except when
+ * it's SBT_MAX that we set it to above
+ * which is expected for the first one.
+ */
+ if (qpair->deadline != SBT_MAX)
+ nvme_printf(ctrlr, "Adjusting: %jd to %jd\n",
+ (uintmax_t)qpair->deadline,
+ (uintmax_t)tr->deadline);
+ qpair->deadline = tr->deadline;
+ }
+ }
+ }
+ break;
+ case RECOVERY_START:
+ /*
+ * Read csts to get value of cfs - controller fatal status.
+ * If no fatal status, try to call the completion routine, and
+ * if completes transactions, report a missed interrupt and
+ * return (this may need to be rate limited). Otherwise, if
+ * aborts are enabled and the controller is not reporting
+ * fatal status, abort the command. Otherwise, just reset the
+ * controller and hope for the best.
+ */
+ csts = nvme_mmio_read_4(ctrlr, csts);
+ cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK;
+ if (cfs) {
+ nvme_printf(ctrlr, "Controller in fatal status, resetting\n");
+ qpair->recovery_state = RECOVERY_RESET;
+ goto again;
+ }
+ mtx_unlock(&qpair->lock);
+ if (nvme_qpair_process_completions(qpair)) {
+ nvme_printf(ctrlr, "Missing interrupt\n");
+ qpair->recovery_state = RECOVERY_NONE;
+ // XXX do I need a NOP here to not race?
+ } else {
+ nvme_printf(ctrlr, "missed interrupt with nothing complete\n");
+ qpair->recovery_state = RECOVERY_RESET;
+ mtx_lock(&qpair->lock);
+ goto again;
+ }
+ mtx_lock(&qpair->lock);
+ break;
+ case RECOVERY_RESET:
nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
(csts == NVME_GONE) ? " and possible hot unplug" :
(cfs ? " and fatal error status" : ""));
+ nvme_printf(ctrlr, "RECOVERY_WAITING\n");
+ qpair->recovery_state = RECOVERY_WAITING;
nvme_ctrlr_reset(ctrlr);
+ break;
+ case RECOVERY_WAITING:
+ nvme_printf(ctrlr, "waiting\n");
+ break;
}
+
+ /*
+ * Rearm the timeout.
+ */
+ if (!idle) {
+ callout_reset_on(&qpair->timer, qpair->ticks,
+ nvme_qpair_timeout, qpair, qpair->cpu);
+ } else {
+ qpair->timer_armed = false;
+ qpair->deadline = SBT_MAX;
+ }
+ mtx_unlock(&qpair->lock);
}
+/*
+ * Submit the tracker to the hardware. Must already be in the
+ * outstanding queue when called.
+ */
void
nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
{
@@ -967,12 +1035,18 @@
if (req->timeout) {
if (req->cb_fn == nvme_completion_poll_cb)
- timeout = hz;
+ timeout = 1;
else
- timeout = ctrlr->timeout_period * hz;
- callout_reset_on(&tr->timer, timeout, nvme_timeout, tr,
- qpair->cpu);
+ timeout = ctrlr->timeout_period;
+ tr->deadline = getsbinuptime() + timeout * SBT_1S;
+ if (!qpair->timer_armed) {
+ qpair->ticks = hz / 2;
+ qpair->timer_armed = true;
+ callout_reset_on(&qpair->timer, qpair->ticks,
+ nvme_qpair_timeout, qpair, qpair->cpu);
+ }
}
+ qpair->deadline = TAILQ_FIRST(&qpair->outstanding_tr)->deadline;
/* Copy the command from the tracker to the submission queue. */
memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
@@ -1047,7 +1121,7 @@
tr = TAILQ_FIRST(&qpair->free_tr);
req->qpair = qpair;
- if (tr == NULL || !qpair->is_enabled) {
+ if (tr == NULL || qpair->recovery_state != RECOVERY_NONE) {
/*
* No tracker is available, or the qpair is disabled due to
* an in-progress controller-level reset or controller
@@ -1076,6 +1150,8 @@
TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
+ if (!qpair->timer_armed)
+ tr->deadline = SBT_MAX;
tr->req = req;
switch (req->type) {
@@ -1144,8 +1220,9 @@
static void
nvme_qpair_enable(struct nvme_qpair *qpair)
{
+ mtx_assert(&qpair->lock, MA_OWNED);
- qpair->is_enabled = true;
+ qpair->recovery_state = RECOVERY_NONE;
}
void
@@ -1188,7 +1265,9 @@
NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
}
+ mtx_lock(&qpair->lock);
nvme_qpair_enable(qpair);
+ mtx_unlock(&qpair->lock);
}
void
@@ -1231,12 +1310,9 @@
static void
nvme_qpair_disable(struct nvme_qpair *qpair)
{
- struct nvme_tracker *tr;
- qpair->is_enabled = false;
mtx_lock(&qpair->lock);
- TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
- callout_stop(&tr->timer);
+ qpair->recovery_state = RECOVERY_WAITING;
mtx_unlock(&qpair->lock);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, May 14, 6:04 AM (12 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33032989
Default Alt Text
D28583.id83669.diff (11 KB)
Attached To
Mode
D28583: nvme: Use shared timeout rather than timeout per transaction
Attached
Detach File
Event Timeline
Log In to Comment