Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/nvme/nvme_qpair.c
Show First 20 Lines • Show All 446 Lines • ▼ Show 20 Lines | if (req->type != NVME_REQUEST_NULL) { | ||||
tr->payload_dma_map, | tr->payload_dma_map, | ||||
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); | BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); | ||||
} | } | ||||
if (req->cb_fn) | if (req->cb_fn) | ||||
req->cb_fn(req->cb_arg, cpl); | req->cb_fn(req->cb_arg, cpl); | ||||
} | } | ||||
mtx_lock(&qpair->lock); | mtx_lock(&qpair->lock); | ||||
callout_stop(&tr->timer); | |||||
if (retry) { | if (retry) { | ||||
req->retries++; | req->retries++; | ||||
nvme_qpair_submit_tracker(qpair, tr); | nvme_qpair_submit_tracker(qpair, tr); | ||||
} else { | } else { | ||||
if (req->type != NVME_REQUEST_NULL) { | if (req->type != NVME_REQUEST_NULL) { | ||||
bus_dmamap_unload(qpair->dma_tag_payload, | bus_dmamap_unload(qpair->dma_tag_payload, | ||||
tr->payload_dma_map); | tr->payload_dma_map); | ||||
Show All 10 Lines | if (retry) { | ||||
* try to submit queued requests here - let the reset logic | * try to submit queued requests here - let the reset logic | ||||
* handle that instead. | * handle that instead. | ||||
*/ | */ | ||||
if (!STAILQ_EMPTY(&qpair->queued_req) && | if (!STAILQ_EMPTY(&qpair->queued_req) && | ||||
!qpair->ctrlr->is_resetting) { | !qpair->ctrlr->is_resetting) { | ||||
req = STAILQ_FIRST(&qpair->queued_req); | req = STAILQ_FIRST(&qpair->queued_req); | ||||
STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); | STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); | ||||
_nvme_qpair_submit_request(qpair, req); | _nvme_qpair_submit_request(qpair, req); | ||||
} else if (TAILQ_EMPTY(&qpair->outstanding_tr)) { | |||||
qpair->deadline = SBT_MAX; | |||||
} | } | ||||
} | } | ||||
mtx_unlock(&qpair->lock); | mtx_unlock(&qpair->lock); | ||||
} | } | ||||
static void | static void | ||||
nvme_qpair_manual_complete_tracker( | nvme_qpair_manual_complete_tracker( | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | nvme_qpair_process_completions(struct nvme_qpair *qpair) | ||||
qpair->num_intr_handler_calls++; | qpair->num_intr_handler_calls++; | ||||
/* | /* | ||||
* qpair is not enabled, likely because a controller reset is is in | * qpair is not enabled, likely because a controller reset is is in | ||||
* progress. Ignore the interrupt - any I/O that was associated with | * progress. Ignore the interrupt - any I/O that was associated with | ||||
* this interrupt will get retried when the reset is complete. | * this interrupt will get retried when the reset is complete. | ||||
*/ | */ | ||||
if (!qpair->is_enabled) | if (qpair->recovery_state != RECOVERY_NONE) | ||||
return (false); | return (false); | ||||
/* | /* | ||||
* A panic can stop the CPU this routine is running on at any point. If | * A panic can stop the CPU this routine is running on at any point. If | ||||
* we're called during a panic, complete the sq_head wrap protocol for | * we're called during a panic, complete the sq_head wrap protocol for | ||||
* the case where we are interrupted just after the increment at 1 | * the case where we are interrupted just after the increment at 1 | ||||
* below, but before we can reset cq_head to zero at 2. Also cope with | * below, but before we can reset cq_head to zero at 2. Also cope with | ||||
* the case where we do the zero at 2, but may or may not have done the | * the case where we do the zero at 2, but may or may not have done the | ||||
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines | nvme_qpair_construct(struct nvme_qpair *qpair, | ||||
qpair->num_failures = 0; | qpair->num_failures = 0; | ||||
qpair->cmd = (struct nvme_command *)queuemem; | qpair->cmd = (struct nvme_command *)queuemem; | ||||
qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz); | qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz); | ||||
prpmem = (uint8_t *)(queuemem + cmdsz + cplsz); | prpmem = (uint8_t *)(queuemem + cmdsz + cplsz); | ||||
qpair->cmd_bus_addr = queuemem_phys; | qpair->cmd_bus_addr = queuemem_phys; | ||||
qpair->cpl_bus_addr = queuemem_phys + cmdsz; | qpair->cpl_bus_addr = queuemem_phys + cmdsz; | ||||
prpmem_phys = queuemem_phys + cmdsz + cplsz; | prpmem_phys = queuemem_phys + cmdsz + cplsz; | ||||
callout_init(&qpair->timer, 1); | |||||
qpair->timer_armed = false; | |||||
qpair->deadline = SBT_MAX; | |||||
qpair->recovery_state = RECOVERY_NONE; | |||||
/* | /* | ||||
* Calcuate the stride of the doorbell register. Many emulators set this | * Calcuate the stride of the doorbell register. Many emulators set this | ||||
* value to correspond to a cache line. However, some hardware has set | * value to correspond to a cache line. However, some hardware has set | ||||
* it to various small values. | * it to various small values. | ||||
*/ | */ | ||||
qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[0]) + | qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[0]) + | ||||
(qpair->id << (ctrlr->dstrd + 1)); | (qpair->id << (ctrlr->dstrd + 1)); | ||||
qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[0]) + | qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[0]) + | ||||
Show All 21 Lines | if (trunc_page(list_phys) != | ||||
prp_list = | prp_list = | ||||
(uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE); | (uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE); | ||||
} | } | ||||
tr = malloc_domainset(sizeof(*tr), M_NVME, | tr = malloc_domainset(sizeof(*tr), M_NVME, | ||||
DOMAINSET_PREF(qpair->domain), M_ZERO | M_WAITOK); | DOMAINSET_PREF(qpair->domain), M_ZERO | M_WAITOK); | ||||
bus_dmamap_create(qpair->dma_tag_payload, 0, | bus_dmamap_create(qpair->dma_tag_payload, 0, | ||||
&tr->payload_dma_map); | &tr->payload_dma_map); | ||||
callout_init(&tr->timer, 1); | |||||
tr->cid = i; | tr->cid = i; | ||||
tr->qpair = qpair; | tr->qpair = qpair; | ||||
tr->prp = (uint64_t *)prp_list; | tr->prp = (uint64_t *)prp_list; | ||||
tr->prp_bus_addr = list_phys; | tr->prp_bus_addr = list_phys; | ||||
TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); | TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); | ||||
list_phys += prpsz; | list_phys += prpsz; | ||||
prp_list += prpsz; | prp_list += prpsz; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | |||||
void | void | ||||
nvme_io_qpair_destroy(struct nvme_qpair *qpair) | nvme_io_qpair_destroy(struct nvme_qpair *qpair) | ||||
{ | { | ||||
nvme_qpair_destroy(qpair); | nvme_qpair_destroy(qpair); | ||||
} | } | ||||
#if 0 | |||||
static void | static void | ||||
nvme_abort_complete(void *arg, const struct nvme_completion *status) | nvme_abort_complete(void *arg, const struct nvme_completion *status) | ||||
{ | { | ||||
struct nvme_tracker *tr = arg; | struct nvme_tracker *tr = arg; | ||||
/* | /* | ||||
* If cdw0 == 1, the controller was not able to abort the command | * If cdw0 == 1, the controller was not able to abort the command | ||||
* we requested. We still need to check the active tracker array, | * we requested. We still need to check the active tracker array, | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, | ||||
nvme_abort_complete, tr); | nvme_abort_complete, tr); | ||||
} else { | } else { | ||||
nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n", | nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n", | ||||
(csts == 0xffffffff) ? " and possible hot unplug" : | (csts == 0xffffffff) ? " and possible hot unplug" : | ||||
(cfs ? " and fatal error status" : "")); | (cfs ? " and fatal error status" : "")); | ||||
nvme_ctrlr_reset(ctrlr); | nvme_ctrlr_reset(ctrlr); | ||||
} | } | ||||
} | } | ||||
#else | |||||
static void | |||||
nvme_qpair_timeout(void *arg) | |||||
{ | |||||
struct nvme_qpair *qpair = arg; | |||||
struct nvme_controller *ctrlr = qpair->ctrlr; | |||||
struct nvme_tracker *tr; | |||||
struct nvme_tracker *tr_temp; | |||||
sbintime_t now; | |||||
bool idle; | |||||
uint32_t csts; | |||||
uint8_t cfs; | |||||
mtx_lock(&qpair->lock); | |||||
idle = TAILQ_EMPTY(&qpair->outstanding_tr); | |||||
if (idle && qpair->deadline != SBT_MAX) { | |||||
nvme_printf(ctrlr, "Had to reset deadline from %jd to MAX due to idle\n", | |||||
rpokala: The first operation will set deadline to non-`SBT_MAX`, and then `outstanding_tr` would become… | |||||
qpair->deadline); | |||||
qpair->deadline = SBT_MAX; | |||||
} | |||||
again: | |||||
switch (qpair->recovery_state) { | |||||
case RECOVERY_NONE: | |||||
now = getsbinuptime(); | |||||
if (!idle && now > qpair->deadline) { | |||||
qpair->deadline = SBT_MAX; | |||||
TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { | |||||
if (now > tr->deadline) { | |||||
/* | |||||
* We're now passed our earliest deadline. We need to do | |||||
* expensive things to cope, but next time. Flag that and | |||||
* close the door to any further processing. | |||||
*/ | |||||
qpair->recovery_state = RECOVERY_START; | |||||
nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd %jd\n", | |||||
(uintmax_t)now, (uintmax_t)qpair->deadline, | |||||
(uintmax_t)tr->deadline); | |||||
qpair->deadline = SBT_MAX; | |||||
break; | |||||
} | |||||
if (qpair->deadline > tr->deadline) { | |||||
nvme_printf(ctrlr, "Adjusting: %jd to %jd\n", | |||||
(uintmax_t)qpair->deadline, | |||||
(uintmax_t)tr->deadline); | |||||
qpair->deadline = tr->deadline; | |||||
} | |||||
} | |||||
} | |||||
break; | |||||
case RECOVERY_START: | |||||
/* | |||||
* Read csts to get value of cfs - controller fatal status. | |||||
* If no fatal status, try to call the completion routine, and | |||||
* if completes transactions, report a missed interrupt and | |||||
* return (this may need to be rate limited). Otherwise, if | |||||
* aborts are enabled and the controller is not reporting | |||||
* fatal status, abort the command. Otherwise, just reset the | |||||
* controller and hope for the best. | |||||
*/ | |||||
csts = nvme_mmio_read_4(ctrlr, csts); | |||||
cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK; | |||||
if (cfs) { | |||||
nvme_printf(ctrlr, "Controller in fatal status, resetting\n"); | |||||
qpair->recovery_state = RECOVERY_RESET; | |||||
goto again; | |||||
} | |||||
mtx_unlock(&qpair->lock); | |||||
if (nvme_qpair_process_completions(qpair)) { | |||||
nvme_printf(ctrlr, "Missing interrupt\n"); | |||||
qpair->recovery_state = RECOVERY_NONE; | |||||
// XXX do I need a NOP here to not race? | |||||
} else { | |||||
nvme_printf(ctrlr, "missed interrupt with nothing complete\n"); | |||||
qpair->recovery_state = RECOVERY_RESET; | |||||
mtx_lock(&qpair->lock); | |||||
goto again; | |||||
} | |||||
mtx_lock(&qpair->lock); | |||||
break; | |||||
case RECOVERY_RESET: | |||||
nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n", | |||||
(csts == 0xffffffff) ? " and possible hot unplug" : | |||||
(cfs ? " and fatal error status" : "")); | |||||
nvme_printf(ctrlr, "RECOVERY_WAITING\n"); | |||||
qpair->recovery_state = RECOVERY_WAITING; | |||||
nvme_ctrlr_reset(ctrlr); | |||||
break; | |||||
case RECOVERY_WAITING: | |||||
nvme_printf(ctrlr, "waiting\n"); | |||||
break; | |||||
} | |||||
/* | |||||
* Rearm the timeout. | |||||
*/ | |||||
if (!idle) { | |||||
callout_reset_on(&qpair->timer, qpair->ticks, | |||||
nvme_qpair_timeout, qpair, qpair->cpu); | |||||
} else { | |||||
qpair->timer_armed = false; | |||||
qpair->deadline = SBT_MAX; | |||||
} | |||||
mtx_unlock(&qpair->lock); | |||||
} | |||||
#endif | |||||
/* | |||||
* Submit the tracker to the hardware. Must already be in the | |||||
* outstanding queue when called. | |||||
*/ | |||||
void | void | ||||
nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) | nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) | ||||
{ | { | ||||
struct nvme_request *req; | struct nvme_request *req; | ||||
struct nvme_controller *ctrlr; | struct nvme_controller *ctrlr; | ||||
int timeout; | int timeout; | ||||
mtx_assert(&qpair->lock, MA_OWNED); | mtx_assert(&qpair->lock, MA_OWNED); | ||||
req = tr->req; | req = tr->req; | ||||
req->cmd.cid = tr->cid; | req->cmd.cid = tr->cid; | ||||
qpair->act_tr[tr->cid] = tr; | qpair->act_tr[tr->cid] = tr; | ||||
ctrlr = qpair->ctrlr; | ctrlr = qpair->ctrlr; | ||||
if (req->timeout) { | if (req->timeout) { | ||||
if (req->cb_fn == nvme_completion_poll_cb) | if (req->cb_fn == nvme_completion_poll_cb) | ||||
timeout = hz; | timeout = 1; | ||||
else | else | ||||
timeout = ctrlr->timeout_period * hz; | timeout = ctrlr->timeout_period; | ||||
callout_reset_on(&tr->timer, timeout, nvme_timeout, tr, | tr->deadline = getsbinuptime() + timeout * SBT_1S; | ||||
qpair->cpu); | if (!qpair->timer_armed) { | ||||
qpair->ticks = hz / 2; | |||||
qpair->timer_armed = true; | |||||
callout_reset_on(&qpair->timer, qpair->ticks, | |||||
nvme_qpair_timeout, qpair, qpair->cpu); | |||||
} | } | ||||
} | |||||
qpair->deadline = TAILQ_FIRST(&qpair->outstanding_tr)->deadline; | |||||
/* Copy the command from the tracker to the submission queue. */ | /* Copy the command from the tracker to the submission queue. */ | ||||
memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); | memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); | ||||
if (++qpair->sq_tail == qpair->num_entries) | if (++qpair->sq_tail == qpair->num_entries) | ||||
qpair->sq_tail = 0; | qpair->sq_tail = 0; | ||||
bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map, | bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map, | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) | ||||
struct nvme_tracker *tr; | struct nvme_tracker *tr; | ||||
int err = 0; | int err = 0; | ||||
mtx_assert(&qpair->lock, MA_OWNED); | mtx_assert(&qpair->lock, MA_OWNED); | ||||
tr = TAILQ_FIRST(&qpair->free_tr); | tr = TAILQ_FIRST(&qpair->free_tr); | ||||
req->qpair = qpair; | req->qpair = qpair; | ||||
if (tr == NULL || !qpair->is_enabled) { | if (tr == NULL || qpair->recovery_state != RECOVERY_NONE) { | ||||
/* | /* | ||||
* No tracker is available, or the qpair is disabled due to | * No tracker is available, or the qpair is disabled due to | ||||
* an in-progress controller-level reset or controller | * an in-progress controller-level reset or controller | ||||
* failure. | * failure. | ||||
*/ | */ | ||||
if (qpair->ctrlr->is_failed) { | if (qpair->ctrlr->is_failed) { | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) | ||||
mtx_lock(&qpair->lock); | mtx_lock(&qpair->lock); | ||||
_nvme_qpair_submit_request(qpair, req); | _nvme_qpair_submit_request(qpair, req); | ||||
mtx_unlock(&qpair->lock); | mtx_unlock(&qpair->lock); | ||||
} | } | ||||
static void | static void | ||||
nvme_qpair_enable(struct nvme_qpair *qpair) | nvme_qpair_enable(struct nvme_qpair *qpair) | ||||
{ | { | ||||
mtx_assert(&qpair->lock, MA_OWNED); | |||||
qpair->is_enabled = true; | qpair->recovery_state = RECOVERY_NONE; | ||||
} | } | ||||
void | void | ||||
nvme_qpair_reset(struct nvme_qpair *qpair) | nvme_qpair_reset(struct nvme_qpair *qpair) | ||||
{ | { | ||||
qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; | qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; | ||||
Show All 26 Lines | nvme_admin_qpair_enable(struct nvme_qpair *qpair) | ||||
*/ | */ | ||||
TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { | TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { | ||||
nvme_printf(qpair->ctrlr, | nvme_printf(qpair->ctrlr, | ||||
"aborting outstanding admin command\n"); | "aborting outstanding admin command\n"); | ||||
nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC, | nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC, | ||||
NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL); | NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL); | ||||
} | } | ||||
mtx_lock(&qpair->lock); | |||||
nvme_qpair_enable(qpair); | nvme_qpair_enable(qpair); | ||||
mtx_unlock(&qpair->lock); | |||||
} | } | ||||
void | void | ||||
nvme_io_qpair_enable(struct nvme_qpair *qpair) | nvme_io_qpair_enable(struct nvme_qpair *qpair) | ||||
{ | { | ||||
STAILQ_HEAD(, nvme_request) temp; | STAILQ_HEAD(, nvme_request) temp; | ||||
struct nvme_tracker *tr; | struct nvme_tracker *tr; | ||||
struct nvme_tracker *tr_temp; | struct nvme_tracker *tr_temp; | ||||
Show All 26 Lines | nvme_io_qpair_enable(struct nvme_qpair *qpair) | ||||
} | } | ||||
mtx_unlock(&qpair->lock); | mtx_unlock(&qpair->lock); | ||||
} | } | ||||
static void | static void | ||||
nvme_qpair_disable(struct nvme_qpair *qpair) | nvme_qpair_disable(struct nvme_qpair *qpair) | ||||
{ | { | ||||
struct nvme_tracker *tr; | |||||
qpair->is_enabled = false; | |||||
mtx_lock(&qpair->lock); | mtx_lock(&qpair->lock); | ||||
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) | qpair->recovery_state = RECOVERY_WAITING; | ||||
callout_stop(&tr->timer); | |||||
mtx_unlock(&qpair->lock); | mtx_unlock(&qpair->lock); | ||||
} | } | ||||
void | void | ||||
nvme_admin_qpair_disable(struct nvme_qpair *qpair) | nvme_admin_qpair_disable(struct nvme_qpair *qpair) | ||||
{ | { | ||||
nvme_qpair_disable(qpair); | nvme_qpair_disable(qpair); | ||||
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines |
The first operation will set deadline to non-SBT_MAX, and then outstanding_tr would become empty when all in-flight operations complete, right? Wouldn't that mean that this would get printed all the time on an idle system?