diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -1201,22 +1201,37 @@ { struct nvme_controller *ctrlr = arg; int status; + bool force_fail = false; nvme_ctrlr_devctl_log(ctrlr, "RESET", "resetting controller"); + + /* + * Make sure that all ISRs are done before proceeding with the reset, + * and also keep any stray interrupts from causng damage. If we timeout + * on any of these, then pause 100ms and fail the controller since + * IRSs shouldn't be 'stuck'. + */ + force_fail |= nvme_qpair_block_isr(&ctrlr->adminq); + for (int i = 0; i < ctrlr->num_io_queues; i++) + force_fail |= nvme_qpair_block_isr(&ctrlr->ioq[i]); + if (force_fail) + pause("nvmeR", hz / 10); + status = nvme_ctrlr_hw_reset(ctrlr); + /* - * Use pause instead of DELAY, so that we yield to any nvme interrupt - * handlers on this CPU that were blocked on a qpair lock. We want - * all nvme interrupts completed before proceeding with restarting the - * controller. - * - * XXX - any way to guarantee the interrupt handlers have quiesced? + * Now that we've reset, allow ISRs so we can startup the controller + * again. */ - pause("nvmereset", hz / 10); - if (status == 0) + nvme_qpair_unblock_isr(&ctrlr->adminq); + for (int i = 0; i < ctrlr->num_io_queues; i++) + nvme_qpair_unblock_isr(&ctrlr->ioq[i]); + + if (status == 0 && !force_fail) { nvme_ctrlr_start(ctrlr, true); - else + } else { nvme_ctrlr_fail(ctrlr); + } atomic_cmpset_32(&ctrlr->is_resetting, 1, 0); } diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -428,6 +428,8 @@ void nvme_qpair_manual_complete_request(struct nvme_qpair *qpair, struct nvme_request *req, uint32_t sct, uint32_t sc); +bool nvme_qpair_block_isr(struct nvme_qpair *qpair); +void nvme_qpair_unblock_isr(struct nvme_qpair *qpair); void nvme_admin_qpair_enable(struct nvme_qpair *qpair); void nvme_admin_qpair_disable(struct nvme_qpair *qpair); diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -530,6 +530,32 @@ nvme_free_request(req); } +bool +nvme_qpair_block_isr(struct nvme_qpair *qpair) +{ + bool in_panic = dumping || SCHEDULER_STOPPED(); + int to = hz / 10; + + /* + * Set that we're in the interrupt and return. This will make any + * interrupts that happen after this point nops. + */ + if (in_panic) + return (false); + while (!atomic_cmpset_32(&qpair->in_isr, 0, 1) && hz-- > 0) + pause("nvmeQB", 1); + return (hz <= 0); +} + +void +nvme_qpair_unblock_isr(struct nvme_qpair *qpair) +{ + /* + * Say we're out of the ISR so that the ISRs will function again. + */ + atomic_store_32(&qpair->in_isr, 0); +} + bool nvme_qpair_process_completions(struct nvme_qpair *qpair) {