Index: sys/dev/nvme/nvme_qpair.c =================================================================== --- sys/dev/nvme/nvme_qpair.c +++ sys/dev/nvme/nvme_qpair.c @@ -536,6 +536,7 @@ struct nvme_completion cpl; int done = 0; bool in_panic = dumping || SCHEDULER_STOPPED(); + bool first = true; qpair->num_intr_handler_calls++; @@ -583,13 +584,33 @@ } while (1) { + uint16_t status; + + /* + * We need to do this dance to avoid a race between the host and + * the device where the device overtakes the host while the host + * is reading this record, leaving the status field 'new' and the + * sqhd and sqid potentially stale. If the phase doesn't match, + * that means status hasn't yet been updated and we'll get any + * pending changes next time. It also means that it must be the + * same the second time. We have to sync before reading to ensure + * any bouncing completes. + */ + if (!first) { + bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + } + status = le16toh(atomic_load_acq_16(&qpair->cpl[qpair->cq_head])); + if (NVME_STATUS_GET_P(status) != qpair->phase) + break; + cpl = qpair->cpl[qpair->cq_head]; /* Convert to host endian */ nvme_completion_swapbytes(&cpl); - if (NVME_STATUS_GET_P(cpl.status) != qpair->phase) - break; + KASSERT(status == cpl.status, ("received completion for unknown cmd")); + first = false; tr = qpair->act_tr[cpl.cid];