Index: usr.sbin/bhyve/pci_nvme.c =================================================================== --- usr.sbin/bhyve/pci_nvme.c +++ usr.sbin/bhyve/pci_nvme.c @@ -135,21 +135,21 @@ struct nvme_completion_queue { struct nvme_completion *qbase; + pthread_mutex_t mtx; uint32_t size; uint16_t tail; /* nvme progress */ uint16_t head; /* guest progress */ uint16_t intr_vec; uint32_t intr_en; - pthread_mutex_t mtx; }; struct nvme_submission_queue { struct nvme_command *qbase; + pthread_mutex_t mtx; uint32_t size; uint16_t head; /* nvme progress */ uint16_t tail; /* guest progress */ uint16_t cqid; /* completion queue id */ - int busy; /* queue is being processed */ int qpriority; }; @@ -305,6 +305,62 @@ *status |= NVME_STATUS_P; } +/* + * Initialize the requested number or IO Submission and Completion Queues. + * Admin queues are allocated implicitly. + */ +static void +pci_nvme_init_queues(struct pci_nvme_softc *sc, uint32_t nsq, uint32_t ncq) +{ + uint32_t i; + + /* + * Allocate and initialize the Submission Queues + */ + if (nsq > NVME_QUEUES) { + WPRINTF(("%s: clamping number of SQ from %u to %u\r\n", + __func__, nsq, NVME_QUEUES)); + nsq = NVME_QUEUES; + } + + sc->num_squeues = nsq; + + sc->submit_queues = calloc(sc->num_squeues + 1, + sizeof(struct nvme_submission_queue)); + if (sc->submit_queues == NULL) { + WPRINTF(("%s: SQ allocation failed\r\n", __func__)); + sc->num_squeues = 0; + } else { + struct nvme_submission_queue *sq = sc->submit_queues; + + for (i = 0; i < sc->num_squeues; i++) + pthread_mutex_init(&sq[i].mtx, NULL); + } + + /* + * Allocate and initialize the Completion Queues + */ + if (ncq > NVME_QUEUES) { + WPRINTF(("%s: clamping number of CQ from %u to %u\r\n", + __func__, ncq, NVME_QUEUES)); + ncq = NVME_QUEUES; + } + + sc->num_cqueues = ncq; + + sc->compl_queues = calloc(sc->num_cqueues + 1, + sizeof(struct nvme_completion_queue)); + if (sc->compl_queues == NULL) { + WPRINTF(("%s: CQ allocation failed\r\n", __func__)); + sc->num_cqueues = 0; + } else { + struct nvme_completion_queue *cq = sc->compl_queues; + + for (i = 0; i < sc->num_cqueues; i++) + pthread_mutex_init(&cq[i].mtx, NULL); + } +} + static void pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) { @@ -383,6 +439,8 @@ static void pci_nvme_reset_locked(struct pci_nvme_softc *sc) { + uint32_t i; + DPRINTF(("%s\r\n", __func__)); sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | @@ -396,44 +454,23 @@ sc->regs.cc = 0; sc->regs.csts = 0; - sc->num_cqueues = sc->num_squeues = sc->max_queues; - if (sc->submit_queues != NULL) { - for (int i = 0; i < sc->num_squeues + 1; i++) { - /* - * The Admin Submission Queue is at index 0. - * It must not be changed at reset otherwise the - * emulation will be out of sync with the guest. - */ - if (i != 0) { - sc->submit_queues[i].qbase = NULL; - sc->submit_queues[i].size = 0; - sc->submit_queues[i].cqid = 0; - } - sc->submit_queues[i].tail = 0; - sc->submit_queues[i].head = 0; - sc->submit_queues[i].busy = 0; - } - } else - sc->submit_queues = calloc(sc->num_squeues + 1, - sizeof(struct nvme_submission_queue)); + assert(sc->submit_queues != NULL); - if (sc->compl_queues != NULL) { - for (int i = 0; i < sc->num_cqueues + 1; i++) { - /* See Admin Submission Queue note above */ - if (i != 0) { - sc->compl_queues[i].qbase = NULL; - sc->compl_queues[i].size = 0; - } + for (i = 0; i < sc->num_squeues + 1; i++) { + sc->submit_queues[i].qbase = NULL; + sc->submit_queues[i].size = 0; + sc->submit_queues[i].cqid = 0; + sc->submit_queues[i].tail = 0; + sc->submit_queues[i].head = 0; + } - sc->compl_queues[i].tail = 0; - sc->compl_queues[i].head = 0; - } - } else { - sc->compl_queues = calloc(sc->num_cqueues + 1, - sizeof(struct nvme_completion_queue)); + assert(sc->compl_queues != NULL); - for (int i = 0; i < sc->num_cqueues + 1; i++) - pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); + for (i = 0; i < sc->num_cqueues + 1; i++) { + sc->compl_queues[i].qbase = NULL; + sc->compl_queues[i].size = 0; + sc->compl_queues[i].tail = 0; + sc->compl_queues[i].head = 0; } } @@ -966,13 +1003,9 @@ sq = &sc->submit_queues[0]; - sqhead = atomic_load_acq_short(&sq->head); + pthread_mutex_lock(&sq->mtx); - if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s SQ busy, head %u, tail %u\r\n", - __func__, sqhead, sq->tail)); - return; - } + sqhead = sq->head; DPRINTF(("sqhead %u, tail %u\r\n", sqhead, sq->tail)); @@ -1035,6 +1068,8 @@ cq = &sc->compl_queues[0]; + pthread_mutex_lock(&cq->mtx); + cp = &(cq->qbase)[cq->tail]; cp->cdw0 = compl.cdw0; cp->sqid = 0; @@ -1046,17 +1081,19 @@ pci_nvme_toggle_phase(&cp->status, phase); cq->tail = (cq->tail + 1) % cq->size; + + pthread_mutex_unlock(&cq->mtx); } sqhead = (sqhead + 1) % sq->size; } DPRINTF(("setting sqhead %u\r\n", sqhead)); - atomic_store_short(&sq->head, sqhead); - atomic_store_int(&sq->busy, 0); + sq->head = sqhead; if (do_intr) pci_generate_msix(sc->nsc_pi, 0); + pthread_mutex_unlock(&sq->mtx); } static int @@ -1146,11 +1183,10 @@ static void pci_nvme_set_completion(struct pci_nvme_softc *sc, struct nvme_submission_queue *sq, int sqid, uint16_t cid, - uint32_t cdw0, uint16_t status, int ignore_busy) + uint32_t cdw0, uint16_t status) { struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; struct nvme_completion *compl; - int do_intr = 0; int phase; DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x\r\n", @@ -1163,7 +1199,7 @@ compl = &cq->qbase[cq->tail]; - compl->sqhd = atomic_load_acq_short(&sq->head); + compl->sqhd = sq->head; compl->sqid = sqid; compl->cid = cid; @@ -1175,13 +1211,9 @@ cq->tail = (cq->tail + 1) % cq->size; if (cq->intr_en & NVME_CQ_INTEN) - do_intr = 1; + pci_generate_msix(sc->nsc_pi, cq->intr_vec); pthread_mutex_unlock(&cq->mtx); - - if (ignore_busy || !atomic_load_acq_int(&sq->busy)) - if (do_intr) - pci_generate_msix(sc->nsc_pi, cq->intr_vec); } static void @@ -1250,7 +1282,7 @@ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); - pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status, 0); + pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status); pci_nvme_release_ioreq(req->sc, req); } @@ -1276,12 +1308,9 @@ /* handle all submissions up to sq->tail index */ sq = &sc->submit_queues[idx]; - if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s sqid %u busy\r\n", __func__, idx)); - return; - } + pthread_mutex_lock(&sq->mtx); - sqhead = atomic_load_acq_short(&sq->head); + sqhead = sq->head; DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p\r\n", idx, sqhead, sq->tail, sq->qbase)); @@ -1302,7 +1331,7 @@ if (cmd->opc == NVME_OPC_FLUSH) { pci_nvme_status_genc(&status, NVME_SC_SUCCESS); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); + status); continue; } else if (cmd->opc == 0x08) { @@ -1311,7 +1340,7 @@ __func__, lba, cmd->cdw12 & 0xFFFF)); pci_nvme_status_genc(&status, NVME_SC_SUCCESS); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); + status); continue; } @@ -1425,7 +1454,7 @@ pci_nvme_status_genc(&status, code); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); + status); continue; } @@ -1458,13 +1487,14 @@ NVME_SC_DATA_TRANSFER_ERROR); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); + status); pci_nvme_release_ioreq(sc, req); } } - atomic_store_short(&sq->head, sqhead); - atomic_store_int(&sq->busy, 0); + sq->head = sqhead; + + pthread_mutex_unlock(&sq->mtx); } static void @@ -1475,6 +1505,13 @@ idx, is_sq ? "SQ" : "CQ", value & 0xFFFF)); if (is_sq) { + if (idx > sc->num_squeues) { + WPRINTF(("%s queue index %lu overflow from " + "guest (max %u)\r\n", + __func__, idx, sc->num_squeues)); + return; + } + atomic_store_short(&sc->submit_queues[idx].tail, (uint16_t)value); @@ -1498,7 +1535,8 @@ return; } - sc->compl_queues[idx].head = (uint16_t)value; + atomic_store_short(&sc->compl_queues[idx].head, + (uint16_t)value); } } @@ -1928,11 +1966,13 @@ pthread_mutex_init(&sc->mtx, NULL); sem_init(&sc->iosemlock, 0, sc->ioslots); - pci_nvme_reset(sc); + pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues); pci_nvme_init_ctrldata(sc); pci_nvme_init_nsdata(sc); pci_nvme_init_logpages(sc); + pci_nvme_reset(sc); + pci_lintr_request(pi); done: