Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F143935073
D46026.id141089.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
4 KB
Referenced Files
None
Subscribers
None
D46026.id141089.diff
View Options
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -1037,8 +1037,8 @@
struct nvme_controller *ctrlr = qpair->ctrlr;
struct nvme_tracker *tr;
sbintime_t now;
- bool idle = false;
- bool needs_reset;
+ bool idle = true;
+ bool fast;
uint32_t csts;
uint8_t cfs;
@@ -1084,23 +1084,69 @@
*/
csts = nvme_mmio_read_4(ctrlr, csts);
cfs = NVMEV(NVME_CSTS_REG_CFS, csts);
- if (csts == NVME_GONE || cfs == 1)
- goto do_reset;
+ if (csts == NVME_GONE || cfs == 1) {
+ /*
+ * We've had a command timeout that we weren't able to
+ * abort or we have aborts disabled and any command
+ * timed out.
+ *
+ * If we get here due to a possible surprise hot-unplug
+ * event, then we let nvme_ctrlr_reset confirm and fail
+ * the controller.
+ */
+do_reset:
+ nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
+ (csts == 0xffffffff) ? " and possible hot unplug" :
+ (cfs ? " and fatal error status" : ""));
+ qpair->recovery_state = RECOVERY_WAITING;
+ nvme_ctrlr_reset(ctrlr);
+ idle = false;
+ break;
+ }
+
/*
- * Process completions. We already have the recovery lock, so
- * call the locked version.
+ * See if there's any recovery needed. First, do a fast check to
+ * see if anything could have timed out. If not, then skip
+ * everything else.
+ */
+ fast = false;
+ mtx_lock(&qpair->lock);
+ TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) {
+ /*
+ * Skip async commands, they are posted to the card for
+ * an indefinite amount of time and have no deadline.
+ */
+ if (tr->deadline == SBT_MAX)
+ continue;
+
+ /*
+ * If the first real transaction is not in timeout, then
+ * we're done. Otherwise, we try recovery.
+ */
+ idle = false;
+ if (now <= tr->deadline)
+ fast = true;
+ break;
+ }
+ mtx_unlock(&qpair->lock);
+ if (idle || fast)
+ break;
+
+ /*
+ * There's a stale transaction at the start of the queue that's
+ * at least 1/2s old. Call the completion ISR to catch 'missed'
+ * interrupts.
*/
_nvme_qpair_process_completions(qpair);
/*
- * Check to see if we need to timeout any commands. If we do, then
- * we also enter a recovery phase.
+ * Now that we've run the ISR, re-rheck to see if there's any
+ * timed out commands and abort them or reset the card if so.
*/
now = getsbinuptime();
- needs_reset = false;
- idle = true;
mtx_lock(&qpair->lock);
+ idle = true;
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) {
/*
* Skip async commands, they are posted to the card for
@@ -1108,48 +1154,44 @@
*/
if (tr->deadline == SBT_MAX)
continue;
- if (now > tr->deadline) {
- if (tr->req->cb_fn != nvme_abort_complete &&
- ctrlr->enable_aborts) {
- /*
- * This isn't an abort command, ask
- * for a hardware abort.
- */
- nvme_ctrlr_cmd_abort(ctrlr, tr->cid,
- qpair->id, nvme_abort_complete, tr);
- } else {
- /*
- * Otherwise we have a live command in
- * the card (either one we couldn't
- * abort, or aborts weren't enabled).
- * The only safe way to proceed is to do
- * a reset.
- */
- needs_reset = true;
- }
+
+ /*
+ * If we know this tracker hasn't timed out, we also
+ * know all subsequent ones haven't timed out. The tr
+ * queue is in submission order and all normal commands
+ * in a queue have the same timeout (or the timeout was
+ * changed by the user, but we eventually timeout then).
+ */
+ idle = false;
+ if (now <= tr->deadline)
+ break;
+
+ /*
+ * Timeout expired, abort it or reset controller.
+ */
+ if (ctrlr->enable_aborts &&
+ tr->req->cb_fn != nvme_abort_complete) {
+ /*
+ * This isn't an abort command, ask for a
+ * hardware abort. This goes to the admin
+ * queue which will reset the card if it
+ * times out.
+ */
+ nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+ nvme_abort_complete, tr);
} else {
- idle = false;
+ /*
+ * We have a live command in the card (either
+ * one we couldn't abort, or aborts weren't
+ * enabled). We can only reset.
+ */
+ mtx_unlock(&qpair->lock);
+ goto do_resest;
}
}
mtx_unlock(&qpair->lock);
- if (!needs_reset)
- break;
-
- /*
- * We've had a command timeout that we weren't able to abort
- *
- * If we get here due to a possible surprise hot-unplug event,
- * then we let nvme_ctrlr_reset confirm and fail the
- * controller.
- */
- do_reset:
- nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
- (csts == 0xffffffff) ? " and possible hot unplug" :
- (cfs ? " and fatal error status" : ""));
- qpair->recovery_state = RECOVERY_WAITING;
- nvme_ctrlr_reset(ctrlr);
- idle = false; /* We want to keep polling */
break;
+
case RECOVERY_WAITING:
/*
* These messages aren't interesting while we're suspended. We
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Feb 3, 5:34 AM (10 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28421397
Default Alt Text
D46026.id141089.diff (4 KB)
Attached To
Mode
D46026: nvme: Optimize timeout code further
Attached
Detach File
Event Timeline
Log In to Comment