diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -175,6 +175,7 @@ int64_t num_failures; int64_t num_ignored; int64_t num_recovery_nolock; + int64_t num_soft_timeouts; struct nvme_command *cmd; struct nvme_completion *cpl; diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -1025,7 +1025,7 @@ struct nvme_tracker *tr; sbintime_t now; bool idle = false; - bool needs_reset; + bool needs_reset, try_isr; uint32_t csts; uint8_t cfs; @@ -1074,12 +1074,6 @@ if (csts == NVME_GONE || cfs == 1) goto do_reset; - /* - * Process completions. We already have the recovery lock, so - * call the locked version. - */ - _nvme_qpair_process_completions(qpair); - /* * Check to see if we need to timeout any commands. If we do, then * we also enter a recovery phase. @@ -1087,20 +1081,37 @@ now = getsbinuptime(); needs_reset = false; idle = true; + try_isr = false; mtx_lock(&qpair->lock); TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) { + sbintime_t softdelta; + /* * Skip async commands, they are posted to the card for * an indefinite amount of time and have no deadline. */ if (tr->deadline == SBT_MAX) continue; + + /* + * Soft timeout on I/Os. Don't soft timeout admin. We + * start after the startup, so we can't see instant + * commands. We soft time them out at 0.01 of their + * normal full timeout. A soft timeout means we start to + * run the ISR completion command, and nothing else. + */ + if (qpair->id == 0) + softdelta = ctrlr->admin_timeout_period * SBT_1S * 99 / 100; + else + softdelta = ctrlr->timeout_period * SBT_1S * 99 / 100; + if (now > tr->deadline) { if (tr->req->cb_fn != nvme_abort_complete && ctrlr->enable_aborts) { /* * This isn't an abort command, ask * for a hardware abort. + * How to avoid duplicates? */ nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, nvme_abort_complete, tr); @@ -1113,12 +1124,28 @@ * a reset. */ needs_reset = true; + break; } + } else if (now > tr->deadline - softdelta) { + try_isr = true; + idle = false; + break; } else { idle = false; + break; } } mtx_unlock(&qpair->lock); + + if (try_isr) { + /* + * Process completions. We already have the recovery lock, so + * call the locked version. + */ + _nvme_qpair_process_completions(qpair); + qpair->num_soft_timeouts++; + } + if (!needs_reset) break; diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c --- a/sys/dev/nvme/nvme_sysctl.c +++ b/sys/dev/nvme/nvme_sysctl.c @@ -164,6 +164,7 @@ qpair->num_failures = 0; qpair->num_ignored = 0; qpair->num_recovery_nolock = 0; + qpair->num_soft_timeouts = 0; } static int @@ -256,6 +257,21 @@ return (sysctl_handle_64(oidp, &num, 0, req)); } +static int +nvme_sysctl_num_soft_timeouts(SYSCTL_HANDLER_ARGS) +{ + struct nvme_controller *ctrlr = arg1; + int64_t num; + int i; + + num = ctrlr->adminq.num_soft_timeouts; + + for (i = 0; i < ctrlr->num_io_queues; i++) + num += ctrlr->ioq[i].num_soft_timeouts; + + return (sysctl_handle_64(oidp, &num, 0, req)); +} + static int nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS) { @@ -317,6 +333,9 @@ SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_recovery_nolock", CTLFLAG_RD, &qpair->num_recovery_nolock, "Number of times that we failed to lock recovery in the ISR"); + SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_soft_timeouts", + CTLFLAG_RD, &qpair->num_soft_timeouts, + "Number of times request took long enough for us to fallback to calling ISR directly"); SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO, "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, @@ -395,6 +414,11 @@ ctrlr, 0, nvme_sysctl_num_recovery_nolock, "IU", "Number of times that we failed to lock recovery in the ISR"); + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, + "num_soft_timeouts", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + ctrlr, 0, nvme_sysctl_num_soft_timeouts, "IU", + "Number of times request took long enough for us to fallback to calling ISR directly"); + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");