Index: head/sys/dev/nvme/nvme_private.h =================================================================== --- head/sys/dev/nvme/nvme_private.h (revision 241433) +++ head/sys/dev/nvme/nvme_private.h (revision 241434) @@ -1,374 +1,375 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __NVME_PRIVATE_H__ #define __NVME_PRIVATE_H__ #include #include #include #include #include #include #include #include #include #include "nvme.h" #define DEVICE2SOFTC(dev) ((struct nvme_controller *) device_get_softc(dev)) MALLOC_DECLARE(M_NVME); #define CHATHAM2 #ifdef CHATHAM2 #define CHATHAM_PCI_ID 0x20118086 #define CHATHAM_CONTROL_BAR 0 #endif #define IDT_PCI_ID 0x80d0111d #define NVME_MAX_PRP_LIST_ENTRIES (128) /* * For commands requiring more than 2 PRP entries, one PRP will be * embedded in the command (prp1), and the rest of the PRP entries * will be in a list pointed to by the command (prp2). This means * that real max number of PRP entries we support is 128+1, which * results in a max xfer size of 128*PAGE_SIZE. */ #define NVME_MAX_XFER_SIZE NVME_MAX_PRP_LIST_ENTRIES * PAGE_SIZE #define NVME_ADMIN_ENTRIES (128) /* min and max are defined in admin queue attributes section of spec */ #define NVME_MIN_ADMIN_ENTRIES (2) #define NVME_MAX_ADMIN_ENTRIES (4096) #define NVME_IO_ENTRIES (1024) /* min is a reasonable value picked for the nvme(4) driver */ #define NVME_MIN_IO_ENTRIES (128) /* * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES * for each controller. */ #define NVME_INT_COAL_TIME (0) /* disabled */ #define NVME_INT_COAL_THRESHOLD (0) /* 0-based */ #define NVME_MAX_NAMESPACES (16) #define NVME_MAX_CONSUMERS (2) #define NVME_MAX_ASYNC_EVENTS (4) #define NVME_TIMEOUT_IN_SEC (30) #ifndef CACHE_LINE_SIZE #define CACHE_LINE_SIZE (64) #endif struct nvme_prp_list { uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; SLIST_ENTRY(nvme_prp_list) slist; bus_addr_t bus_addr; bus_dmamap_t dma_map; }; struct nvme_tracker { SLIST_ENTRY(nvme_tracker) slist; struct nvme_qpair *qpair; struct nvme_command cmd; struct callout timer; bus_dmamap_t dma_map; nvme_cb_fn_t cb_fn; void *cb_arg; uint32_t payload_size; struct nvme_prp_list *prp_list; uint16_t cid; }; struct nvme_qpair { struct nvme_controller *ctrlr; uint32_t id; uint32_t phase; uint16_t vector; int rid; struct resource *res; void *tag; uint32_t max_xfer_size; uint32_t num_entries; uint32_t sq_tdbl_off; uint32_t cq_hdbl_off; uint32_t sq_head; uint32_t sq_tail; uint32_t cq_head; int64_t num_cmds; + int64_t num_intr_handler_calls; struct nvme_command *cmd; struct nvme_completion *cpl; bus_dma_tag_t dma_tag; bus_dmamap_t cmd_dma_map; uint64_t cmd_bus_addr; bus_dmamap_t cpl_dma_map; uint64_t cpl_bus_addr; uint32_t num_tr; uint32_t num_prp_list; SLIST_HEAD(, nvme_tracker) free_tr; struct nvme_tracker **act_tr; SLIST_HEAD(, nvme_prp_list) free_prp_list; struct mtx lock __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); struct nvme_namespace { struct nvme_controller *ctrlr; struct nvme_namespace_data data; uint16_t id; uint16_t flags; struct cdev *cdev; }; /* * One of these per allocated PCI device. */ struct nvme_controller { device_t dev; uint32_t ready_timeout_in_ms; bus_space_tag_t bus_tag; bus_space_handle_t bus_handle; int resource_id; struct resource *resource; #ifdef CHATHAM2 bus_space_tag_t chatham_bus_tag; bus_space_handle_t chatham_bus_handle; int chatham_resource_id; struct resource *chatham_resource; #endif uint32_t msix_enabled; uint32_t force_intx; uint32_t num_io_queues; boolean_t per_cpu_io_queues; /* Fields for tracking progress during controller initialization. */ struct intr_config_hook config_hook; uint32_t ns_identified; uint32_t queues_created; /* For shared legacy interrupt. */ int rid; struct resource *res; void *tag; struct task task; struct taskqueue *taskqueue; bus_dma_tag_t hw_desc_tag; bus_dmamap_t hw_desc_map; /** maximum i/o size in bytes */ uint32_t max_xfer_size; /** interrupt coalescing time period (in microseconds) */ uint32_t int_coal_time; /** interrupt coalescing threshold */ uint32_t int_coal_threshold; struct nvme_qpair adminq; struct nvme_qpair *ioq; struct nvme_registers *regs; struct nvme_controller_data cdata; struct nvme_namespace ns[NVME_MAX_NAMESPACES]; struct cdev *cdev; boolean_t is_started; #ifdef CHATHAM2 uint64_t chatham_size; uint64_t chatham_lbas; #endif }; #define nvme_mmio_offsetof(reg) \ offsetof(struct nvme_registers, reg) #define nvme_mmio_read_4(sc, reg) \ bus_space_read_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg)) #define nvme_mmio_write_4(sc, reg, val) \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val) #define nvme_mmio_write_8(sc, reg, val) \ do { \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg)+4, \ (val & 0xFFFFFFFF00000000UL) >> 32); \ } while (0); #ifdef CHATHAM2 #define chatham_read_4(softc, reg) \ bus_space_read_4((softc)->chatham_bus_tag, \ (softc)->chatham_bus_handle, reg) #define chatham_write_8(sc, reg, val) \ do { \ bus_space_write_4((sc)->chatham_bus_tag, \ (sc)->chatham_bus_handle, reg, val & 0xffffffff); \ bus_space_write_4((sc)->chatham_bus_tag, \ (sc)->chatham_bus_handle, reg+4, \ (val & 0xFFFFFFFF00000000UL) >> 32); \ } while (0); #endif /* CHATHAM2 */ #if __FreeBSD_version < 800054 #define wmb() __asm volatile("sfence" ::: "memory") #define mb() __asm volatile("mfence" ::: "memory") #endif void nvme_ns_test(struct nvme_namespace *ns, u_long cmd, caddr_t arg); void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature, uint32_t cdw11, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature, uint32_t cdw11, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr, void *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint16_t nsid, void *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr, uint32_t microseconds, uint32_t threshold, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr, uint32_t nsid, struct nvme_health_information_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, uint16_t vector, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr, uint32_t num_queues, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_asynchronous_event_config(struct nvme_controller *ctrlr, union nvme_critical_warning_state state, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_asynchronous_event_request(struct nvme_controller *ctrlr, nvme_cb_fn_t cb_fn, void *cb_arg); struct nvme_tracker * nvme_allocate_tracker(struct nvme_controller *ctrlr, boolean_t is_admin, nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t payload_size, void *payload); void nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error); int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev); int nvme_ctrlr_reset(struct nvme_controller *ctrlr); /* ctrlr defined as void * to allow use with config_intrhook. */ void nvme_ctrlr_start(void *ctrlr_arg); void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, struct nvme_controller *ctrlr); void nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr); void nvme_qpair_process_completions(struct nvme_qpair *qpair); struct nvme_tracker * nvme_qpair_allocate_tracker(struct nvme_qpair *qpair, boolean_t alloc_prp_list); void nvme_admin_qpair_destroy(struct nvme_qpair *qpair); void nvme_io_qpair_destroy(struct nvme_qpair *qpair); int nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, struct nvme_controller *ctrlr); int nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag); void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr); void nvme_dump_command(struct nvme_command *cmd); void nvme_dump_completion(struct nvme_completion *cpl); static __inline void nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { uint64_t *bus_addr = (uint64_t *)arg; *bus_addr = seg[0].ds_addr; } #endif /* __NVME_PRIVATE_H__ */ Index: head/sys/dev/nvme/nvme_qpair.c =================================================================== --- head/sys/dev/nvme/nvme_qpair.c (revision 241433) +++ head/sys/dev/nvme/nvme_qpair.c (revision 241434) @@ -1,422 +1,425 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "nvme_private.h" static boolean_t nvme_completion_check_retry(const struct nvme_completion *cpl) { /* * TODO: spec is not clear how commands that are aborted due * to TLER will be marked. So for now, it seems * NAMESPACE_NOT_READY is the only case where we should * look at the DNR bit. */ switch (cpl->sf_sct) { case NVME_SCT_GENERIC: switch (cpl->sf_sc) { case NVME_SC_NAMESPACE_NOT_READY: if (cpl->sf_dnr) return (0); else return (1); case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: case NVME_SC_COMMAND_ID_CONFLICT: case NVME_SC_DATA_TRANSFER_ERROR: case NVME_SC_ABORTED_POWER_LOSS: case NVME_SC_INTERNAL_DEVICE_ERROR: case NVME_SC_ABORTED_BY_REQUEST: case NVME_SC_ABORTED_SQ_DELETION: case NVME_SC_ABORTED_FAILED_FUSED: case NVME_SC_ABORTED_MISSING_FUSED: case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: case NVME_SC_COMMAND_SEQUENCE_ERROR: case NVME_SC_LBA_OUT_OF_RANGE: case NVME_SC_CAPACITY_EXCEEDED: default: return (0); } case NVME_SCT_COMMAND_SPECIFIC: case NVME_SCT_MEDIA_ERROR: case NVME_SCT_VENDOR_SPECIFIC: default: return (0); } } struct nvme_tracker * nvme_qpair_allocate_tracker(struct nvme_qpair *qpair, boolean_t alloc_prp_list) { struct nvme_tracker *tr; struct nvme_prp_list *prp_list; mtx_lock(&qpair->lock); tr = SLIST_FIRST(&qpair->free_tr); if (tr == NULL) { /* TODO: fail if malloc returns NULL */ tr = malloc(sizeof(struct nvme_tracker), M_NVME, M_ZERO | M_NOWAIT); bus_dmamap_create(qpair->dma_tag, 0, &tr->dma_map); callout_init_mtx(&tr->timer, &qpair->lock, 0); tr->cid = qpair->num_tr++; } else SLIST_REMOVE_HEAD(&qpair->free_tr, slist); if (alloc_prp_list) { prp_list = SLIST_FIRST(&qpair->free_prp_list); if (prp_list == NULL) { prp_list = malloc(sizeof(struct nvme_prp_list), M_NVME, M_ZERO | M_NOWAIT); bus_dmamap_create(qpair->dma_tag, 0, &prp_list->dma_map); bus_dmamap_load(qpair->dma_tag, prp_list->dma_map, prp_list->prp, sizeof(struct nvme_prp_list), nvme_single_map, &prp_list->bus_addr, 0); qpair->num_prp_list++; } else { SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist); } tr->prp_list = prp_list; } return (tr); } void nvme_qpair_process_completions(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_completion *cpl; boolean_t retry, error; + qpair->num_intr_handler_calls++; + while (1) { cpl = &qpair->cpl[qpair->cq_head]; if (cpl->p != qpair->phase) break; tr = qpair->act_tr[cpl->cid]; KASSERT(tr, ("completion queue has entries but no active trackers\n")); error = cpl->sf_sc || cpl->sf_sct; retry = error && nvme_completion_check_retry(cpl); if (error) { nvme_dump_completion(cpl); nvme_dump_command(&tr->cmd); } qpair->act_tr[cpl->cid] = NULL; KASSERT(cpl->cid == tr->cmd.cid, ("cpl cid does not match cmd cid\n")); if (tr->cb_fn && !retry) tr->cb_fn(tr->cb_arg, cpl); qpair->sq_head = cpl->sqhd; mtx_lock(&qpair->lock); callout_stop(&tr->timer); if (retry) /* nvme_qpair_submit_cmd() will release the lock. */ nvme_qpair_submit_cmd(qpair, tr); else { if (tr->prp_list) { SLIST_INSERT_HEAD(&qpair->free_prp_list, tr->prp_list, slist); tr->prp_list = NULL; } if (tr->payload_size > 0) bus_dmamap_unload(qpair->dma_tag, tr->dma_map); SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); mtx_unlock(&qpair->lock); } if (++qpair->cq_head == qpair->num_entries) { qpair->cq_head = 0; qpair->phase = !qpair->phase; } nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, qpair->cq_head); } } static void nvme_qpair_msix_handler(void *arg) { struct nvme_qpair *qpair = arg; nvme_qpair_process_completions(qpair); } void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, struct nvme_controller *ctrlr) { qpair->id = id; qpair->vector = vector; qpair->num_entries = num_entries; qpair->max_xfer_size = max_xfer_size; qpair->ctrlr = ctrlr; /* * First time through the completion queue, HW will set phase * bit on completions to 1. So set this to 1 here, indicating * we're looking for a 1 to know which entries have completed. * we'll toggle the bit each time when the completion queue * rolls over. */ qpair->phase = 1; if (ctrlr->msix_enabled) { /* * MSI-X vector resource IDs start at 1, so we add one to * the queue's vector to get the corresponding rid to use. */ qpair->rid = vector + 1; qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, &qpair->rid, RF_ACTIVE); bus_setup_intr(ctrlr->dev, qpair->res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_qpair_msix_handler, qpair, &qpair->tag); } mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, NULL, NULL, &qpair->dma_tag); qpair->num_cmds = 0; + qpair->num_intr_handler_calls = 0; qpair->num_tr = 0; qpair->num_prp_list = 0; qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ qpair->cmd = contigmalloc(qpair->num_entries * sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); qpair->cpl = contigmalloc(qpair->num_entries * sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), nvme_single_map, &qpair->cmd_bus_addr, 0); bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), nvme_single_map, &qpair->cpl_bus_addr, 0); qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); SLIST_INIT(&qpair->free_tr); SLIST_INIT(&qpair->free_prp_list); qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, M_NVME, M_ZERO | M_NOWAIT); } static void nvme_qpair_destroy(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_prp_list *prp_list; if (qpair->tag) bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); if (qpair->res) bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, rman_get_rid(qpair->res), qpair->res); if (qpair->dma_tag) bus_dma_tag_destroy(qpair->dma_tag); if (qpair->act_tr) free(qpair->act_tr, M_NVME); while (!SLIST_EMPTY(&qpair->free_tr)) { tr = SLIST_FIRST(&qpair->free_tr); SLIST_REMOVE_HEAD(&qpair->free_tr, slist); bus_dmamap_destroy(qpair->dma_tag, tr->dma_map); free(tr, M_NVME); } while (!SLIST_EMPTY(&qpair->free_prp_list)) { prp_list = SLIST_FIRST(&qpair->free_prp_list); SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist); bus_dmamap_destroy(qpair->dma_tag, prp_list->dma_map); free(prp_list, M_NVME); } } void nvme_admin_qpair_destroy(struct nvme_qpair *qpair) { /* * For NVMe, you don't send delete queue commands for the admin * queue, so we just need to unload and free the cmd and cpl memory. */ bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); contigfree(qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), M_NVME); bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); contigfree(qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), M_NVME); nvme_qpair_destroy(qpair); } static void nvme_free_cmd_ring(void *arg, const struct nvme_completion *status) { struct nvme_qpair *qpair; qpair = (struct nvme_qpair *)arg; bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); contigfree(qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), M_NVME); qpair->cmd = NULL; } static void nvme_free_cpl_ring(void *arg, const struct nvme_completion *status) { struct nvme_qpair *qpair; qpair = (struct nvme_qpair *)arg; bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); contigfree(qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), M_NVME); qpair->cpl = NULL; } void nvme_io_qpair_destroy(struct nvme_qpair *qpair) { struct nvme_controller *ctrlr = qpair->ctrlr; if (qpair->num_entries > 0) { nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring, qpair); /* Spin until free_cmd_ring sets qpair->cmd to NULL. */ while (qpair->cmd) DELAY(5); nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring, qpair); /* Spin until free_cpl_ring sets qpair->cmd to NULL. */ while (qpair->cpl) DELAY(5); nvme_qpair_destroy(qpair); } } static void nvme_timeout(void *arg) { /* * TODO: Add explicit abort operation here, once nvme(4) supports * abort commands. */ } void nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) { tr->cmd.cid = tr->cid; qpair->act_tr[tr->cid] = tr; /* * TODO: rather than spin until entries free up, put this tracker * on a queue, and submit from the interrupt handler when * entries free up. */ if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) { do { mtx_unlock(&qpair->lock); DELAY(5); mtx_lock(&qpair->lock); } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head); } callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr); /* Copy the command from the tracker to the submission queue. */ memcpy(&qpair->cmd[qpair->sq_tail], &tr->cmd, sizeof(tr->cmd)); if (++qpair->sq_tail == qpair->num_entries) qpair->sq_tail = 0; wmb(); nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, qpair->sq_tail); qpair->num_cmds++; mtx_unlock(&qpair->lock); } Index: head/sys/dev/nvme/nvme_sysctl.c =================================================================== --- head/sys/dev/nvme/nvme_sysctl.c (revision 241433) +++ head/sys/dev/nvme/nvme_sysctl.c (revision 241434) @@ -1,187 +1,278 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "nvme_private.h" +/* + * CTLTYPE_S64 and sysctl_handle_64 were added in r217616. Define these + * explicitly here for older kernels that don't include the r217616 + * changeset. + */ +#ifndef CTLTYPE_S64 +#define CTLTYPE_S64 CTLTYPE_QUAD +#define sysctl_handle_64 sysctl_handle_quad +#endif + static void nvme_dump_queue(struct nvme_qpair *qpair) { struct nvme_completion *cpl; struct nvme_command *cmd; int i; printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase); printf("Completion queue:\n"); for (i = 0; i < qpair->num_entries; i++) { cpl = &qpair->cpl[i]; printf("%05d: ", i); nvme_dump_completion(cpl); } printf("Submission queue:\n"); for (i = 0; i < qpair->num_entries; i++) { cmd = &qpair->cmd[i]; printf("%05d: ", i); nvme_dump_command(cmd); } } static int nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS) { struct nvme_qpair *qpair = arg1; uint32_t val = 0; int error = sysctl_handle_int(oidp, &val, 0, req); if (error) return (error); if (val != 0) nvme_dump_queue(qpair); return (0); } static int nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; uint32_t oldval = ctrlr->int_coal_time; int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0, req); if (error) return (error); if (oldval != ctrlr->int_coal_time) nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, NULL); return (0); } static int nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; uint32_t oldval = ctrlr->int_coal_threshold; int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0, req); if (error) return (error); if (oldval != ctrlr->int_coal_threshold) nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, NULL); return (0); } static void +nvme_qpair_reset_stats(struct nvme_qpair *qpair) +{ + + qpair->num_cmds = 0; + qpair->num_intr_handler_calls = 0; +} + +static int +nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS) +{ + struct nvme_controller *ctrlr = arg1; + int64_t num_cmds = 0; + int i; + + num_cmds = ctrlr->adminq.num_cmds; + + for (i = 0; i < ctrlr->num_io_queues; i++) + num_cmds += ctrlr->ioq[i].num_cmds; + + return (sysctl_handle_64(oidp, &num_cmds, 0, req)); +} + +static int +nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS) +{ + struct nvme_controller *ctrlr = arg1; + int64_t num_intr_handler_calls = 0; + int i; + + num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls; + + for (i = 0; i < ctrlr->num_io_queues; i++) + num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls; + + return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req)); +} + +static int +nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS) +{ + struct nvme_controller *ctrlr = arg1; + uint32_t i, val = 0; + + int error = sysctl_handle_int(oidp, &val, 0, req); + + if (error) + return (error); + + if (val != 0) { + nvme_qpair_reset_stats(&ctrlr->adminq); + + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_qpair_reset_stats(&ctrlr->ioq[i]); + } + + return (0); +} + + +static void nvme_sysctl_initialize_queue(struct nvme_qpair *qpair, struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree) { struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries", CTLFLAG_RD, &qpair->num_entries, 0, "Number of entries in hardware queue"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_tr", CTLFLAG_RD, &qpair->num_tr, 0, "Number of trackers allocated"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_prp_list", CTLFLAG_RD, &qpair->num_prp_list, 0, "Number of PRP lists allocated"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head", CTLFLAG_RD, &qpair->sq_head, 0, "Current head of submission queue (as observed by driver)"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qpair->sq_tail, 0, "Current tail of submission queue (as observed by driver)"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head", CTLFLAG_RD, &qpair->cq_head, 0, "Current head of completion queue (as observed by driver)"); SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds", CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted"); + SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls", + CTLFLAG_RD, &qpair->num_intr_handler_calls, + "Number of times interrupt handler was invoked (will typically be " + "less than number of actual interrupts generated due to " + "coalescing)"); SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO, "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data"); } void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr) { struct sysctl_ctx_list *ctrlr_ctx; struct sysctl_oid *ctrlr_tree, *que_tree; struct sysctl_oid_list *ctrlr_list; #define QUEUE_NAME_LENGTH 16 char queue_name[QUEUE_NAME_LENGTH]; int i; ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev); ctrlr_tree = device_get_sysctl_tree(ctrlr->dev); ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree); if (ctrlr->is_started) { SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, nvme_sysctl_int_coal_time, "IU", "Interrupt coalescing timeout (in microseconds)"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "int_coal_threshold", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, nvme_sysctl_int_coal_threshold, "IU", "Interrupt coalescing threshold"); + + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, + "num_cmds", CTLTYPE_S64 | CTLFLAG_RD, + ctrlr, 0, nvme_sysctl_num_cmds, "IU", + "Number of commands submitted"); + + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, + "num_intr_handler_calls", CTLTYPE_S64 | CTLFLAG_RD, + ctrlr, 0, nvme_sysctl_num_intr_handler_calls, "IU", + "Number of times interrupt handler was invoked (will " + "typically be less than number of actual interrupts " + "generated due to coalescing)"); + + SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, + "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0, + nvme_sysctl_reset_stats, "IU", "Reset statistics to zero"); } que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq", CTLFLAG_RD, NULL, "Admin Queue"); nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree); for (i = 0; i < ctrlr->num_io_queues; i++) { snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i); que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, queue_name, CTLFLAG_RD, NULL, "IO Queue"); nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx, que_tree); } }