diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -150,7 +150,6 @@
 enum nvme_recovery {
 	RECOVERY_NONE = 0,		/* Normal operations */
 	RECOVERY_WAITING,		/* waiting for the reset to complete */
-	RECOVERY_FAILED,		/* We have failed, no more I/O */
 };
 struct nvme_qpair {
 	struct nvme_controller	*ctrlr;
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -1027,6 +1027,17 @@
 
 	mtx_assert(&qpair->recovery, MA_OWNED);
 
+	/*
+	 * If the controller is failed, then stop polling. This ensures that any
+	 * failure processing that races with the qpair timeout will fail
+	 * safely.
+	 */
+	if (qpair->ctrlr->is_failed) {
+		nvme_printf(qpair->ctrlr,
+		    "Failed controller, stopping watchdog timeout.\n");
+		return;
+	}
+
 	switch (qpair->recovery_state) {
 	case RECOVERY_NONE:
 		/*
@@ -1120,11 +1131,6 @@
 			nvme_printf(ctrlr, "Waiting for reset to complete\n");
 		idle = false;		/* We want to keep polling */
 		break;
-	case RECOVERY_FAILED:
-		KASSERT(qpair->ctrlr->is_failed,
-		    ("Recovery state failed w/o failed controller\n"));
-		idle = true;			/* nothing to monitor */
-		break;
 	}
 
 	/*
@@ -1244,11 +1250,21 @@
 	if (tr == NULL || qpair->recovery_state != RECOVERY_NONE) {
 		/*
 		 * No tracker is available, or the qpair is disabled due to an
-		 * in-progress controller-level reset or controller failure. If
-		 * we lose the race with recovery_state, then we may add an
-		 * extra request to the queue which will be resubmitted later.
-		 * We only set recovery_state to NONE with qpair->lock also
-		 * held.
+		 * in-progress controller-level reset. If we lose the race with
+		 * recovery_state, then we may add an extra request to the queue
+		 * which will be resubmitted later.  We only set recovery_state
+		 * to NONE with qpair->lock also held, so if we observe that the
+		 * state is not NONE, we know it can't transition to NONE below
+		 * when we've submitted the request to hardware.
+		 *
+		 * Also, as part of the failure process, we set recovery_state
+		 * to RECOVERY_WAITING, so we check here to see if we've failed
+		 * the controller. We set it before we call the qpair_fail
+		 * functions, which take out the lock lock before messing with
+		 * queued_req. Since we hold that lock, we know it's safe to
+		 * either fail directly, or queue the failure should is_failed
+		 * be stale. If we lose the race reading is_failed, then
+		 * nvme_qpair_fail will fail the queued request.
 		 */
 
 		if (qpair->ctrlr->is_failed) {
@@ -1314,7 +1330,7 @@
 		mtx_assert(&qpair->recovery, MA_OWNED);
 	if (mtx_initialized(&qpair->lock))
 		mtx_assert(&qpair->lock, MA_OWNED);
-	KASSERT(qpair->recovery_state != RECOVERY_FAILED,
+	KASSERT(!qpair->ctrlr->is_failed,
 	    ("Enabling a failed qpair\n"));
 
 	qpair->recovery_state = RECOVERY_NONE;
@@ -1471,10 +1487,6 @@
 	if (!mtx_initialized(&qpair->lock))
 		return;
 
-	mtx_lock(&qpair->recovery);
-	qpair->recovery_state = RECOVERY_FAILED;
-	mtx_unlock(&qpair->recovery);
-
 	mtx_lock(&qpair->lock);
 
 	if (!STAILQ_EMPTY(&qpair->queued_req)) {