Index: usr.sbin/bhyve/pci_nvme.c =================================================================== --- usr.sbin/bhyve/pci_nvme.c +++ usr.sbin/bhyve/pci_nvme.c @@ -180,6 +180,7 @@ uint32_t sectsz; uint32_t sectsz_bits; uint64_t eui64; + uint32_t deallocate:1; }; struct pci_nvme_ioreq { @@ -362,7 +363,7 @@ cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) | (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT); cd->nn = 1; /* number of namespaces */ - + cd->oncs = (NVME_CTRLR_DATA_ONCS_DSM_MASK << NVME_CTRLR_DATA_ONCS_DSM_SHIFT); cd->fna = 0x03; cd->power_state[0].mp = 10; @@ -429,6 +430,9 @@ nd->ncap = nd->nsze; nd->nuse = nd->nsze; + if (nvstore->type == NVME_STOR_BLOCKIF) + nvstore->deallocate = blockif_candelete(nvstore->ctx); + nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */ nd->flbas = 0; @@ -1340,7 +1344,7 @@ uint16_t code, status; DPRINTF(("%s error %d %s", __func__, err, strerror(err))); - + /* TODO return correct error */ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); @@ -1359,6 +1363,120 @@ pthread_cond_signal(&req->cv); } +static void +pci_nvme_dealloc_sm(struct blockif_req *br, int err) +{ + struct pci_nvme_ioreq *req = br->br_param; + struct pci_nvme_softc *sc = req->sc; + bool done = true; + uint16_t status; + + if (err) { + pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR); + } else if ((req->prev_gpaddr + 1) == (req->prev_size)) { + pci_nvme_status_genc(&status, NVME_SC_SUCCESS); + } else { + struct iovec *iov = req->io_req.br_iov; + + req->prev_gpaddr++; + iov += req->prev_gpaddr; + + /* The iov_* values already include the sector size */ + req->io_req.br_offset = (off_t)iov->iov_base; + req->io_req.br_resid = iov->iov_len; + if (blockif_delete(sc->nvstore.ctx, &req->io_req)) { + pci_nvme_status_genc(&status, + NVME_SC_INTERNAL_DEVICE_ERROR); + } else + done = false; + } + + if (done) { + pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, + req->cid, 0, status, 0); + pci_nvme_release_ioreq(sc, req); + } +} + +static int +nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, + struct nvme_command *cmd, + struct pci_nvme_blockstore *nvstore, + struct pci_nvme_ioreq *req, + uint16_t *status) +{ + int err = -1; + + if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) { + struct nvme_dsm_range *range; + uint32_t nr, r; + int sectsz = blockif_sectsz(nvstore->ctx); + + /* Return error if backing store doesn't support TRIM */ + if (!nvstore->deallocate) { + pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD); + *status |= NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT; + goto out; + } + + if (req == NULL) { + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + goto out; + } + + /* copy locally because a range entry could straddle PRPs */ + range = calloc(1, NVME_MAX_DSM_TRIM); + if (range == NULL) { + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + goto out; + } + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2, + (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP); + + req->opc = cmd->opc; + req->cid = cmd->cid; + req->nsid = cmd->nsid; + /* + * If the request is for more than a single range, store + * the ranges in the br_iov. Optimize for the common case + * of a single range. + * + * Note that NVMe Number of Ranges is a zero based value + */ + nr = cmd->cdw10 & 0xff; + + req->io_req.br_iovcnt = 0; + req->io_req.br_offset = range[0].starting_lba * sectsz; + req->io_req.br_resid = range[0].length * sectsz; + + if (nr == 0) { + req->io_req.br_callback = pci_nvme_io_done; + } else { + struct iovec *iov = req->io_req.br_iov; + + for (r = 0; r <= nr; r++) { + iov[r].iov_base = (void *)(range[r].starting_lba * sectsz); + iov[r].iov_len = range[r].length * sectsz; + } + req->io_req.br_callback = pci_nvme_dealloc_sm; + + /* + * Use prev_gpaddr to track the current entry and + * prev_size to track the number of entries + */ + req->prev_gpaddr = 0; + req->prev_size = r; + } + + err = blockif_delete(nvstore->ctx, &req->io_req); + if (err) + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + + free(range); + } +out: + return (err); +} static void pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) @@ -1411,16 +1529,26 @@ continue; } - nblocks = (cmd->cdw12 & 0xFFFF) + 1; - - bytes = nblocks * sc->nvstore.sectsz; - if (sc->nvstore.type == NVME_STOR_BLOCKIF) { req = pci_nvme_get_ioreq(sc); req->nvme_sq = sq; req->sqid = idx; } + if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) { + if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req, + &status)) { + pci_nvme_set_completion(sc, sq, idx, cmd->cid, + 0, status, 1); + pci_nvme_release_ioreq(sc, req); + } + continue; + } + + nblocks = (cmd->cdw12 & 0xFFFF) + 1; + + bytes = nblocks * sc->nvstore.sectsz; + /* * If data starts mid-page and flows into the next page, then * increase page count