D20478.id58113.diff
No OneTemporary
Actions

Size

3 KB

Referenced Files

None

Subscribers

None

D20478.id58113.diff
View Options

	Index: sys/dev/nvme/nvme_qpair.c
	===================================================================
	--- sys/dev/nvme/nvme_qpair.c
	+++ sys/dev/nvme/nvme_qpair.c
	@@ -31,6 +31,8 @@

	#include <sys/param.h>
	#include <sys/bus.h>
	+#include <sys/conf.h>
	+#include <sys/proc.h>

	#include <dev/pci/pcivar.h>

	@@ -308,7 +310,7 @@
	}

	static void
	-nvme_qpair_print_completion(struct nvme_qpair *qpair,
	+nvme_qpair_print_completion(struct nvme_qpair *qpair,
	struct nvme_completion *cpl)
	{
	uint16_t sct, sc;
	@@ -479,18 +481,30 @@
	struct nvme_tracker *tr;
	struct nvme_completion cpl;
	int done = 0;
	+ bool in_panic = dumping \|\| SCHEDULER_STOPPED();

	qpair->num_intr_handler_calls++;

	+ /*
	+ * qpair is not enabled, likely because a controller reset is is in
	+ * progress. Ignore the interrupt - any I/O that was associated with
	+ * this interrupt will get retried when the reset is complete.
	+ */
	if (!qpair->is_enabled)
	- /*
	- * qpair is not enabled, likely because a controller reset is
	- * is in progress. Ignore the interrupt - any I/O that was
	- * associated with this interrupt will get retried when the
	- * reset is complete.
	- */
	return (false);

	+ /*
	+ * A panic can stop the CPU this routine is running on at any point. If
	+ * we're called during a panic, complete the cq_hea wrap protocol for
	+ * the case where we are interrupted just after the increment at 1
	+ * below, but before we can reset cq_head to zero at 2.
	+ */
	+ if (__predict_false(in_panic) && qpair->cq_head == qpair->num_entries) {
	+ qpair->cq_head = 0;
	+ qpair->phase = !qpair->phase;
	+ }
	+
	+
	bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
	BUS_DMASYNC_POSTREAD \| BUS_DMASYNC_POSTWRITE);
	while (1) {
	@@ -499,6 +513,14 @@
	/* Convert to host endian */
	nvme_completion_swapbytes(&cpl);

	+ /*
	+ * If we're polling for completion due to a panic, there is one
	+ * hole: If the thread the CPU stopped on execute 2 below, but
	+ * not 3, then we'll have the wrong phase and stop. However, if
	+ * it did and there's not actually any new completions, we'll
	+ * also have the wrong phase and stop. Since it's hard to guard
	+ * against this case, we just document it.
	+ */
	if (NVME_STATUS_GET_P(cpl.status) != qpair->phase)
	break;

	@@ -508,17 +530,33 @@
	nvme_qpair_complete_tracker(qpair, tr, &cpl, ERROR_PRINT_ALL);
	qpair->sq_head = cpl.sqhd;
	done++;
	- } else {
	- nvme_printf(qpair->ctrlr,
	+ } else if (!in_panic) {
	+ /*
	+ * A missing tracker is normally an error. However, a
	+ * panic can stop the CPU this routine is running on
	+ * after completing an I/O but before updating
	+ * qpair->cq_head at 1 below. Later, we re-enter this
	+ * routine to poll I/O associated with the kernel
	+ * dump. Don't panic in that case: we need the kernel
	+ * dump more.
	+ */
	+ nvme_printf(qpair->ctrlr,
	"cpl does not map to outstanding cmd\n");
	/* nvme_dump_completion expects device endianess */
	nvme_dump_completion(&qpair->cpl[qpair->cq_head]);
	KASSERT(0, ("received completion for unknown cmd\n"));
	}

	- if (++qpair->cq_head == qpair->num_entries) {
	- qpair->cq_head = 0;
	- qpair->phase = !qpair->phase;
	+ /*
	+ * There's a number of races with the following (see above) when
	+ * the system panics. They might be solve with atomics, but that
	+ * would slow down normal completions too much to make the
	+ * kernel panics more reliable, which is a poor tradeoff for this
	+ * driver.
	+ */
	+ if (++qpair->cq_head == qpair->num_entries) { /* 1 */
	+ qpair->cq_head = 0; /* 2 */
	+ qpair->phase = !qpair->phase; /* 3 */
	}

	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,

File Metadata

Mime Type: text/plain
Expires: Fri, Nov 21, 2:49 PM (15 h, 39 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 25771181
Default Alt Text: D20478.id58113.diff (3 KB)

D20478.id58113.diffNo OneTemporaryActions

D20478.id58113.diffView Options

File Metadata

Event Timeline

D20478.id58113.diff
No OneTemporary
Actions

D20478.id58113.diff
View Options