Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F136910024
D20478.id58113.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
3 KB
Referenced Files
None
Subscribers
None
D20478.id58113.diff
View Options
Index: sys/dev/nvme/nvme_qpair.c
===================================================================
--- sys/dev/nvme/nvme_qpair.c
+++ sys/dev/nvme/nvme_qpair.c
@@ -31,6 +31,8 @@
#include <sys/param.h>
#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
#include <dev/pci/pcivar.h>
@@ -308,7 +310,7 @@
}
static void
-nvme_qpair_print_completion(struct nvme_qpair *qpair,
+nvme_qpair_print_completion(struct nvme_qpair *qpair,
struct nvme_completion *cpl)
{
uint16_t sct, sc;
@@ -479,18 +481,30 @@
struct nvme_tracker *tr;
struct nvme_completion cpl;
int done = 0;
+ bool in_panic = dumping || SCHEDULER_STOPPED();
qpair->num_intr_handler_calls++;
+ /*
+ * qpair is not enabled, likely because a controller reset is is in
+ * progress. Ignore the interrupt - any I/O that was associated with
+ * this interrupt will get retried when the reset is complete.
+ */
if (!qpair->is_enabled)
- /*
- * qpair is not enabled, likely because a controller reset is
- * is in progress. Ignore the interrupt - any I/O that was
- * associated with this interrupt will get retried when the
- * reset is complete.
- */
return (false);
+ /*
+ * A panic can stop the CPU this routine is running on at any point. If
+ * we're called during a panic, complete the cq_hea wrap protocol for
+ * the case where we are interrupted just after the increment at 1
+ * below, but before we can reset cq_head to zero at 2.
+ */
+ if (__predict_false(in_panic) && qpair->cq_head == qpair->num_entries) {
+ qpair->cq_head = 0;
+ qpair->phase = !qpair->phase;
+ }
+
+
bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
while (1) {
@@ -499,6 +513,14 @@
/* Convert to host endian */
nvme_completion_swapbytes(&cpl);
+ /*
+ * If we're polling for completion due to a panic, there is one
+ * hole: If the thread the CPU stopped on execute 2 below, but
+ * not 3, then we'll have the wrong phase and stop. However, if
+ * it did and there's not actually any new completions, we'll
+ * also have the wrong phase and stop. Since it's hard to guard
+ * against this case, we just document it.
+ */
if (NVME_STATUS_GET_P(cpl.status) != qpair->phase)
break;
@@ -508,17 +530,33 @@
nvme_qpair_complete_tracker(qpair, tr, &cpl, ERROR_PRINT_ALL);
qpair->sq_head = cpl.sqhd;
done++;
- } else {
- nvme_printf(qpair->ctrlr,
+ } else if (!in_panic) {
+ /*
+ * A missing tracker is normally an error. However, a
+ * panic can stop the CPU this routine is running on
+ * after completing an I/O but before updating
+ * qpair->cq_head at 1 below. Later, we re-enter this
+ * routine to poll I/O associated with the kernel
+ * dump. Don't panic in that case: we need the kernel
+ * dump more.
+ */
+ nvme_printf(qpair->ctrlr,
"cpl does not map to outstanding cmd\n");
/* nvme_dump_completion expects device endianess */
nvme_dump_completion(&qpair->cpl[qpair->cq_head]);
KASSERT(0, ("received completion for unknown cmd\n"));
}
- if (++qpair->cq_head == qpair->num_entries) {
- qpair->cq_head = 0;
- qpair->phase = !qpair->phase;
+ /*
+ * There's a number of races with the following (see above) when
+ * the system panics. They might be solve with atomics, but that
+ * would slow down normal completions too much to make the
+ * kernel panics more reliable, which is a poor tradeoff for this
+ * driver.
+ */
+ if (++qpair->cq_head == qpair->num_entries) { /* 1 */
+ qpair->cq_head = 0; /* 2 */
+ qpair->phase = !qpair->phase; /* 3 */
}
nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 21, 2:49 PM (15 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25771181
Default Alt Text
D20478.id58113.diff (3 KB)
Attached To
Mode
D20478: Since a fatal trap can happen at aribtrary times, don't panic when the completions are not in a consistent state.
Attached
Detach File
Event Timeline
Log In to Comment