Changeset View
Standalone View
sys/dev/nvme/nvme_qpair.c
Show All 30 Lines | |||||
#include <sys/bus.h> | #include <sys/bus.h> | ||||
#include <dev/pci/pcivar.h> | #include <dev/pci/pcivar.h> | ||||
#include "nvme_private.h" | #include "nvme_private.h" | ||||
static void _nvme_qpair_submit_request(struct nvme_qpair *qpair, | static void _nvme_qpair_submit_request(struct nvme_qpair *qpair, | ||||
struct nvme_request *req); | struct nvme_request *req); | ||||
static void nvme_qpair_destroy(struct nvme_qpair *qpair); | |||||
struct nvme_opcode_string { | struct nvme_opcode_string { | ||||
uint16_t opc; | uint16_t opc; | ||||
const char * str; | const char * str; | ||||
}; | }; | ||||
static struct nvme_opcode_string admin_opcode[] = { | static struct nvme_opcode_string admin_opcode[] = { | ||||
▲ Show 20 Lines • Show All 238 Lines • ▼ Show 20 Lines | nvme_completion_is_retry(const struct nvme_completion *cpl) | ||||
case NVME_SCT_MEDIA_ERROR: | case NVME_SCT_MEDIA_ERROR: | ||||
case NVME_SCT_VENDOR_SPECIFIC: | case NVME_SCT_VENDOR_SPECIFIC: | ||||
default: | default: | ||||
return (0); | return (0); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, | |||||
uint16_t cid) | |||||
{ | |||||
bus_dmamap_create(qpair->dma_tag_payload, 0, &tr->payload_dma_map); | |||||
bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map); | |||||
bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, | |||||
sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); | |||||
callout_init(&tr->timer, 1); | |||||
tr->cid = cid; | |||||
tr->qpair = qpair; | |||||
} | |||||
static void | |||||
nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, | nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, | ||||
struct nvme_completion *cpl, boolean_t print_on_error) | struct nvme_completion *cpl, boolean_t print_on_error) | ||||
{ | { | ||||
struct nvme_request *req; | struct nvme_request *req; | ||||
boolean_t retry, error; | boolean_t retry, error; | ||||
req = tr->req; | req = tr->req; | ||||
error = nvme_completion_is_error(cpl); | error = nvme_completion_is_error(cpl); | ||||
▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
nvme_qpair_msix_handler(void *arg) | nvme_qpair_msix_handler(void *arg) | ||||
{ | { | ||||
struct nvme_qpair *qpair = arg; | struct nvme_qpair *qpair = arg; | ||||
nvme_qpair_process_completions(qpair); | nvme_qpair_process_completions(qpair); | ||||
} | } | ||||
void | int | ||||
nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, | nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, | ||||
uint16_t vector, uint32_t num_entries, uint32_t num_trackers, | uint16_t vector, uint32_t num_entries, uint32_t num_trackers, | ||||
struct nvme_controller *ctrlr) | struct nvme_controller *ctrlr) | ||||
{ | { | ||||
struct nvme_tracker *tr; | struct nvme_tracker *tr; | ||||
uint32_t i; | size_t cmdsz, cplsz, prpsz, allocsz, prpmemsz; | ||||
int err; | uint64_t queuemem_phys, prpmem_phys, list_phys; | ||||
uint8_t *queuemem, *prpmem, *prp_list; | |||||
int i, err; | |||||
qpair->id = id; | qpair->id = id; | ||||
qpair->vector = vector; | qpair->vector = vector; | ||||
qpair->num_entries = num_entries; | qpair->num_entries = num_entries; | ||||
qpair->num_trackers = num_trackers; | qpair->num_trackers = num_trackers; | ||||
qpair->ctrlr = ctrlr; | qpair->ctrlr = ctrlr; | ||||
if (ctrlr->msix_enabled) { | if (ctrlr->msix_enabled) { | ||||
Show All 14 Lines | nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, | ||||
mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); | mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); | ||||
/* Note: NVMe PRP format is restricted to 4-byte alignment. */ | /* Note: NVMe PRP format is restricted to 4-byte alignment. */ | ||||
err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), | err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), | ||||
4, PAGE_SIZE, BUS_SPACE_MAXADDR, | 4, PAGE_SIZE, BUS_SPACE_MAXADDR, | ||||
BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE, | BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE, | ||||
(NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0, | (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0, | ||||
NULL, NULL, &qpair->dma_tag_payload); | NULL, NULL, &qpair->dma_tag_payload); | ||||
if (err != 0) | if (err != 0) { | ||||
nvme_printf(ctrlr, "payload tag create failed %d\n", err); | nvme_printf(ctrlr, "payload tag create failed %d\n", err); | ||||
goto out; | |||||
} | |||||
/* | |||||
* Each component must be page aligned, and individual PRP lists | |||||
* cannot cross a page boundary. | |||||
jimharris: Nit - prplistsz would be a more accurate variable name here. | |||||
Not Done Inline ActionsDecided to remove the variable, it wasn't necessary. scottl: Decided to remove the variable, it wasn't necessary. | |||||
*/ | |||||
cmdsz = qpair->num_entries * sizeof(struct nvme_command); | |||||
cmdsz = roundup2(cmdsz, PAGE_SIZE); | |||||
cplsz = qpair->num_entries * sizeof(struct nvme_completion); | |||||
cplsz = roundup2(cplsz, PAGE_SIZE); | |||||
prpsz = sizeof(uint64_t) * NVME_MAX_PRP_LIST_ENTRIES;; | |||||
prpmemsz = qpair->num_trackers * prpsz; | |||||
allocsz = cmdsz + cplsz + prpmemsz; | |||||
err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), | err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), | ||||
4, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, | PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, | ||||
Done Inline Actions4 needs to change to PAGE_SIZE. See below. jimharris: 4 needs to change to PAGE_SIZE. See below. | |||||
BUS_SPACE_MAXSIZE, 1, BUS_SPACE_MAXSIZE, 0, | allocsz, 1, allocsz, 0, NULL, NULL, &qpair->dma_tag); | ||||
NULL, NULL, &qpair->dma_tag); | if (err != 0) { | ||||
if (err != 0) | |||||
nvme_printf(ctrlr, "tag create failed %d\n", err); | nvme_printf(ctrlr, "tag create failed %d\n", err); | ||||
goto out; | |||||
} | |||||
if (bus_dmamem_alloc(qpair->dma_tag, (void **)&queuemem, | |||||
BUS_DMA_NOWAIT, &qpair->queuemem_map)) { | |||||
nvme_printf(ctrlr, "failed to alloc qpair memory\n"); | |||||
goto out; | |||||
} | |||||
if (bus_dmamap_load(qpair->dma_tag, qpair->queuemem_map, | |||||
queuemem, allocsz, nvme_single_map, &queuemem_phys, 0) != 0) { | |||||
nvme_printf(ctrlr, "failed to load qpair memory\n"); | |||||
goto out; | |||||
} | |||||
qpair->num_cmds = 0; | qpair->num_cmds = 0; | ||||
qpair->num_intr_handler_calls = 0; | qpair->num_intr_handler_calls = 0; | ||||
qpair->cmd = (struct nvme_command *)queuemem; | |||||
qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz); | |||||
Done Inline ActionsBoth cmd and cpl need to be page-aligned. Previously the contigmalloc call took care of this. Since these are allocated in one buffer now, you'll also need to assert that cmd is a PAGE_SIZE multiple. The default number of queue entries for admin and io pairs (128 and 256 respectively) will ensure the cmd queue is a PAGE_SIZE multiple but would be good to double-check that here. jimharris: Both cmd and cpl need to be page-aligned. Previously the contigmalloc call took care of this. | |||||
Not Done Inline ActionsThanks for the insight. I've added seats belts to the next revision scottl: Thanks for the insight. I've added seats belts to the next revision | |||||
prpmem = (uint8_t *)(queuemem + cmdsz + cplsz); | |||||
qpair->cmd_bus_addr = queuemem_phys; | |||||
qpair->cpl_bus_addr = queuemem_phys + cmdsz; | |||||
prpmem_phys = queuemem_phys + cmdsz + cplsz; | |||||
Done Inline ActionsThis should be removed before committing. jimharris: This should be removed before committing. | |||||
qpair->cmd = contigmalloc(qpair->num_entries * | |||||
sizeof(struct nvme_command), M_NVME, M_ZERO, | |||||
0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); | |||||
qpair->cpl = contigmalloc(qpair->num_entries * | |||||
sizeof(struct nvme_completion), M_NVME, M_ZERO, | |||||
0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); | |||||
err = bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); | |||||
if (err != 0) | |||||
nvme_printf(ctrlr, "cmd_dma_map create failed %d\n", err); | |||||
err = bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); | |||||
if (err != 0) | |||||
nvme_printf(ctrlr, "cpl_dma_map create failed %d\n", err); | |||||
bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, | |||||
qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), | |||||
nvme_single_map, &qpair->cmd_bus_addr, 0); | |||||
bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, | |||||
qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), | |||||
nvme_single_map, &qpair->cpl_bus_addr, 0); | |||||
qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); | qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); | ||||
qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); | qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); | ||||
TAILQ_INIT(&qpair->free_tr); | TAILQ_INIT(&qpair->free_tr); | ||||
TAILQ_INIT(&qpair->outstanding_tr); | TAILQ_INIT(&qpair->outstanding_tr); | ||||
STAILQ_INIT(&qpair->queued_req); | STAILQ_INIT(&qpair->queued_req); | ||||
list_phys = prpmem_phys; | |||||
prp_list = prpmem; | |||||
for (i = 0; i < qpair->num_trackers; i++) { | for (i = 0; i < qpair->num_trackers; i++) { | ||||
if (list_phys + prpsz > prpmem_phys + prpmemsz) { | |||||
qpair->num_trackers = i; | |||||
break; | |||||
} | |||||
/* | |||||
* Make sure that the PRP list for this tracker doesn't | |||||
* overflow to another page. | |||||
*/ | |||||
Not Done Inline ActionsThis is probably fine, since realistically we probably won't have a PRP list size that's not even divisible into a page. If anyone ever set MAXPHYS to something like 1.5MB, this would abort out after the first tracker and we'd be stuck with just one tracker per qpair though. Maybe instead you could do something like this above: prpmemsz = PAGE_SIZE * (qpair->num_trackers / (PAGE_SIZE / perps)); This should allocate enough space for PRP lists that when you trunc_page check here fails, you can do a roundup2 to the next page boundary. The other option would be to punt on this for now and just assert that the PRP list divides evenly into PAGE_SIZE. jimharris: This is probably fine, since realistically we probably won't have a PRP list size that's not… | |||||
if (trunc_page(list_phys) != | |||||
trunc_page(list_phys + prpsz - 1)) { | |||||
list_phys = roundup2(list_phys, PAGE_SIZE); | |||||
prp_list = | |||||
(uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE); | |||||
} | |||||
tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK); | tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK); | ||||
nvme_qpair_construct_tracker(qpair, tr, i); | bus_dmamap_create(qpair->dma_tag_payload, 0, | ||||
&tr->payload_dma_map); | |||||
callout_init(&tr->timer, 1); | |||||
tr->cid = i; | |||||
tr->qpair = qpair; | |||||
tr->prp = (uint64_t *)prp_list; | |||||
Not Done Inline ActionsA PRP list cannot span a page boundary, so you'll need to make sure you account for that here when allocating chunks of the bigger buffer for each tracker's PRP list. jimharris: A PRP list cannot span a page boundary, so you'll need to make sure you account for that here… | |||||
tr->prp_bus_addr = list_phys; | |||||
TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); | TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); | ||||
list_phys += prpsz; | |||||
prp_list += prpsz; | |||||
} | } | ||||
qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, | if (qpair->num_trackers == 0) { | ||||
M_NVME, M_ZERO | M_WAITOK); | nvme_printf(ctrlr, "failed to allocate enough trackers\n"); | ||||
goto out; | |||||
} | } | ||||
qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * | |||||
qpair->num_entries, M_NVME, M_ZERO | M_WAITOK); | |||||
return (0); | |||||
out: | |||||
nvme_qpair_destroy(qpair); | |||||
return (ENOMEM); | |||||
} | |||||
static void | static void | ||||
nvme_qpair_destroy(struct nvme_qpair *qpair) | nvme_qpair_destroy(struct nvme_qpair *qpair) | ||||
{ | { | ||||
struct nvme_tracker *tr; | struct nvme_tracker *tr; | ||||
if (qpair->tag) | if (qpair->tag) | ||||
bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); | bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); | ||||
if (mtx_initialized(&qpair->lock)) | |||||
mtx_destroy(&qpair->lock); | |||||
if (qpair->res) | if (qpair->res) | ||||
bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, | bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, | ||||
rman_get_rid(qpair->res), qpair->res); | rman_get_rid(qpair->res), qpair->res); | ||||
if (qpair->cmd) { | if (qpair->cmd != NULL) { | ||||
bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); | bus_dmamap_unload(qpair->dma_tag, qpair->queuemem_map); | ||||
bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); | bus_dmamem_free(qpair->dma_tag, qpair->cmd, | ||||
contigfree(qpair->cmd, | qpair->queuemem_map); | ||||
qpair->num_entries * sizeof(struct nvme_command), M_NVME); | |||||
} | } | ||||
Done Inline ActionsShould you still unload the dmamap here? jimharris: Should you still unload the dmamap here? | |||||
Not Done Inline ActionsGood catch, thanks. scottl: Good catch, thanks. | |||||
if (qpair->cpl) { | |||||
bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); | |||||
bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); | |||||
contigfree(qpair->cpl, | |||||
qpair->num_entries * sizeof(struct nvme_completion), | |||||
M_NVME); | |||||
} | |||||
if (qpair->dma_tag) | if (qpair->dma_tag) | ||||
bus_dma_tag_destroy(qpair->dma_tag); | bus_dma_tag_destroy(qpair->dma_tag); | ||||
if (qpair->dma_tag_payload) | if (qpair->dma_tag_payload) | ||||
bus_dma_tag_destroy(qpair->dma_tag_payload); | bus_dma_tag_destroy(qpair->dma_tag_payload); | ||||
if (qpair->act_tr) | if (qpair->act_tr) | ||||
free(qpair->act_tr, M_NVME); | free(qpair->act_tr, M_NVME); | ||||
while (!TAILQ_EMPTY(&qpair->free_tr)) { | while (!TAILQ_EMPTY(&qpair->free_tr)) { | ||||
tr = TAILQ_FIRST(&qpair->free_tr); | tr = TAILQ_FIRST(&qpair->free_tr); | ||||
TAILQ_REMOVE(&qpair->free_tr, tr, tailq); | TAILQ_REMOVE(&qpair->free_tr, tr, tailq); | ||||
bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); | bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); | ||||
bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map); | |||||
free(tr, M_NVME); | free(tr, M_NVME); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) | nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) | ||||
{ | { | ||||
struct nvme_tracker *tr; | struct nvme_tracker *tr; | ||||
▲ Show 20 Lines • Show All 405 Lines • Show Last 20 Lines |
Nit - prplistsz would be a more accurate variable name here.