Index: sys/dev/nvme/nvme_private.h
===================================================================
--- sys/dev/nvme/nvme_private.h
+++ sys/dev/nvme/nvme_private.h
@@ -142,6 +142,7 @@
 	uint32_t			type;
 	uint32_t			payload_size;
 	bool				timeout;
+	bool				timed_out;
 	nvme_cb_fn_t			cb_fn;
 	void				*cb_arg;
 	int32_t				retries;
@@ -335,6 +336,8 @@
 	bus_dmamap_t			hmb_desc_map;
 	struct nvme_hmb_desc		*hmb_desc_vaddr;
 	uint64_t			hmb_desc_paddr;
+
+	uint64_t			unwedges;
 };
 
 #define nvme_mmio_offsetof(reg)						       \
Index: sys/dev/nvme/nvme_qpair.c
===================================================================
--- sys/dev/nvme/nvme_qpair.c
+++ sys/dev/nvme/nvme_qpair.c
@@ -875,6 +875,18 @@
 	nvme_qpair_destroy(qpair);
 }
 
+static void
+nvme_unwedge_complete(void *arg, const struct nvme_completion *status)
+{
+	struct nvme_controller	*ctrlr = arg;
+
+	if (nvme_completion_is_error(status)) {
+		nvme_printf(ctrlr, "Unwedge command failed, resetting.\n");
+		nvme_ctrlr_reset(ctrlr);
+	}
+}
+
+
 static void
 nvme_abort_complete(void *arg, const struct nvme_completion *status)
 {
@@ -903,36 +915,83 @@
 nvme_timeout(void *arg)
 {
 	struct nvme_tracker	*tr = arg;
+	struct nvme_request	*req = tr->req;
 	struct nvme_qpair	*qpair = tr->qpair;
 	struct nvme_controller	*ctrlr = qpair->ctrlr;
 	uint32_t		csts;
 	uint8_t			cfs;
 
-	/*
-	 * Read csts to get value of cfs - controller fatal status.
-	 * If no fatal status, try to call the completion routine, and
-	 * if completes transactions, report a missed interrupt and
-	 * return (this may need to be rate limited). Otherwise, if
-	 * aborts are enabled and the controller is not reporting
-	 * fatal status, abort the command. Otherwise, just reset the
-	 * controller and hope for the best.
-	 */
 	csts = nvme_mmio_read_4(ctrlr, csts);
 	cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK;
-	if (cfs == 0 && nvme_qpair_process_completions(qpair)) {
-		nvme_printf(ctrlr, "Missing interrupt\n");
-		return;
-	}
-	if (ctrlr->enable_aborts && cfs == 0) {
-		nvme_printf(ctrlr, "Aborting command due to a timeout.\n");
-		nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
-		    nvme_abort_complete, tr);
-	} else {
+	/*
+	 * If this command has previously timed out, then we've sent the
+	 * unwedge command already and it didn't work for some reason.
+	 *
+	 * If the cfs isn't 0, then the controller status is reporting
+	 * as fatal, which means no good can come from waiting further.
+	 *
+	 * In all these cases just reset and return (unless aborts
+	 * are enbaled for a time out, in which case send an abort).
+	 *
+	 * Note: if the command times out, the drive might legitimately just be
+	 * slow. We don't try to do any adaptive things to increase the timeout
+	 * for slow drives. They should have their timeout bumped by the system
+	 * administrator.
+	 */
+	if (req->timed_out || cfs != 0) {
+		if (cfs == 0 && req->timed_out && ctrlr->enable_aborts) {
+			nvme_printf(ctrlr, "Aborting command due to a timeout.\n");
+			nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
+			    nvme_abort_complete, tr);
+			return;
+		}
 		nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
 		    (csts == 0xffffffff) ? " and possible hot unplug" :
 		    (cfs ? " and fatal error status" : ""));
 		nvme_ctrlr_reset(ctrlr);
+		return;
+	}
+
+	/*
+	 * The controller hasn't failed. If this is an unwedge that timed out,
+	 * though, then we tried to send a trivial command to the card and it
+	 * timed out as well. All we can do is reset the controller and hope for
+	 * the best. This gives us better diagnostics, though, before we hit the
+	 * big red panic button.
+	 */
+	if (req->cb_fn == nvme_unwedge_complete) {
+		nvme_printf(ctrlr, "Unwedge attempted timed out, resetting.\n");
+		nvme_ctrlr_reset(ctrlr);
+		return;
+	}
+
+	/*
+	 * Next, we could try to send an abort and have it fail to send at all,
+	 * so if that happens, we reset as well. Note, even though we send the
+	 * commmand, if we have no trackers available for the request, then it
+	 * gets queued and may be delayed. It isn't 100% that we can't send the
+	 * unwedge command itself, but since it's to the administrative queue,
+	 * not the qpair of the original request, that's likely the result of a
+	 * crapton of these commands being sent.
+	 */
+	if (req->cb_fn == nvme_abort_complete) {
+		nvme_printf(ctrlr, "Command abort timed out, resetting.\n");
+		nvme_ctrlr_reset(ctrlr);
+		return;
 	}
+
+	/*
+	 * OK. If we get this far, this is the first time into the timeout and
+	 * it's nothing otherwise special. Note that this request has timed out
+	 * and reset the timeout to the recovery timeout value. Then send a
+	 * boring unwedge command to see if that gets things going again.
+	 * The above code detects when the unwedge code has failed.
+	 */
+	ctrlr->unwedges++;
+	req->timed_out = true;
+	callout_reset_on(&tr->timer, 5 * hz, nvme_timeout, tr, qpair->cpu);
+	nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_ARBITRATION, 0,
+	    NULL, 0, nvme_unwedge_complete, ctrlr);
 }
 
 void