diff --git a/sys/dev/nvmf/host/nvmf_qpair.c b/sys/dev/nvmf/host/nvmf_qpair.c index 1aeb0535eacf..b03ecfa081d3 100644 --- a/sys/dev/nvmf/host/nvmf_qpair.c +++ b/sys/dev/nvmf/host/nvmf_qpair.c @@ -1,426 +1,449 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023-2024 Chelsio Communications, Inc. * Written by: John Baldwin */ #include #include #include #include #include #include #include #include #include #include struct nvmf_host_command { struct nvmf_request *req; TAILQ_ENTRY(nvmf_host_command) link; uint16_t cid; }; struct nvmf_host_qpair { struct nvmf_softc *sc; struct nvmf_qpair *qp; bool sq_flow_control; bool shutting_down; u_int allocating; u_int num_commands; uint16_t sqhd; uint16_t sqtail; uint64_t submitted; struct mtx lock; TAILQ_HEAD(, nvmf_host_command) free_commands; STAILQ_HEAD(, nvmf_request) pending_requests; /* Indexed by cid. */ struct nvmf_host_command **active_commands; char name[16]; struct sysctl_ctx_list sysctl_ctx; }; struct nvmf_request * nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe, nvmf_request_complete_t *cb, void *cb_arg, int how) { struct nvmf_request *req; struct nvmf_qpair *nq; KASSERT(how == M_WAITOK || how == M_NOWAIT, ("%s: invalid how", __func__)); req = malloc(sizeof(*req), M_NVMF, how | M_ZERO); if (req == NULL) return (NULL); mtx_lock(&qp->lock); nq = qp->qp; if (nq == NULL) { mtx_unlock(&qp->lock); free(req, M_NVMF); return (NULL); } qp->allocating++; MPASS(qp->allocating != 0); mtx_unlock(&qp->lock); req->qp = qp; req->cb = cb; req->cb_arg = cb_arg; req->nc = nvmf_allocate_command(nq, sqe, how); if (req->nc == NULL) { free(req, M_NVMF); req = NULL; } mtx_lock(&qp->lock); qp->allocating--; if (qp->allocating == 0 && qp->shutting_down) wakeup(qp); mtx_unlock(&qp->lock); return (req); } static void nvmf_abort_request(struct nvmf_request *req, uint16_t cid) { struct nvme_completion cqe; memset(&cqe, 0, sizeof(cqe)); cqe.cid = cid; cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) | NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST)); req->cb(req->cb_arg, &cqe); } void nvmf_free_request(struct nvmf_request *req) { if (req->nc != NULL) nvmf_free_capsule(req->nc); free(req, M_NVMF); } static void nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd) { struct nvmf_softc *sc = qp->sc; struct nvme_command *sqe; struct nvmf_capsule *nc; + uint16_t new_sqtail; int error; + mtx_assert(&qp->lock, MA_OWNED); + + qp->submitted++; + + /* + * Update flow control tracking. This is just a sanity check. + * Since num_commands == qsize - 1, there can never be too + * many commands in flight. + */ + new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1); + KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp)); + qp->sqtail = new_sqtail; + mtx_unlock(&qp->lock); + nc = cmd->req->nc; sqe = nvmf_capsule_sqe(nc); /* * NB: Don't bother byte-swapping the cid so that receive * doesn't have to swap. */ sqe->cid = cmd->cid; error = nvmf_transmit_capsule(nc); if (error != 0) { device_printf(sc->dev, "failed to transmit capsule: %d, disconnecting\n", error); nvmf_disconnect(sc); return; } if (sc->ka_traffic) atomic_store_int(&sc->ka_active_tx_traffic, 1); } static void nvmf_qp_error(void *arg, int error) { struct nvmf_host_qpair *qp = arg; struct nvmf_softc *sc = qp->sc; /* Ignore simple close of queue pairs during shutdown. */ if (!(sc->detaching && error == 0)) device_printf(sc->dev, "error %d on %s, disconnecting\n", error, qp->name); nvmf_disconnect(sc); } static void nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) { struct nvmf_host_qpair *qp = arg; struct nvmf_softc *sc = qp->sc; struct nvmf_host_command *cmd; struct nvmf_request *req; const struct nvme_completion *cqe; uint16_t cid; cqe = nvmf_capsule_cqe(nc); if (sc->ka_traffic) atomic_store_int(&sc->ka_active_rx_traffic, 1); /* * NB: Don't bother byte-swapping the cid as transmit doesn't * swap either. */ cid = cqe->cid; if (cid > qp->num_commands) { device_printf(sc->dev, "received invalid CID %u, disconnecting\n", cid); nvmf_disconnect(sc); nvmf_free_capsule(nc); return; } + /* Update flow control tracking. */ + mtx_lock(&qp->lock); + if (qp->sq_flow_control) { + if (nvmf_sqhd_valid(nc)) + qp->sqhd = le16toh(cqe->sqhd); + } else { + /* + * If SQ FC is disabled, just advance the head for + * each response capsule received. + */ + qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1); + } + /* * If the queue has been shutdown due to an error, silently * drop the response. */ - mtx_lock(&qp->lock); if (qp->qp == NULL) { device_printf(sc->dev, "received completion for CID %u on shutdown %s\n", cid, qp->name); mtx_unlock(&qp->lock); nvmf_free_capsule(nc); return; } cmd = qp->active_commands[cid]; if (cmd == NULL) { mtx_unlock(&qp->lock); device_printf(sc->dev, "received completion for inactive CID %u, disconnecting\n", cid); nvmf_disconnect(sc); nvmf_free_capsule(nc); return; } KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__)); req = cmd->req; cmd->req = NULL; if (STAILQ_EMPTY(&qp->pending_requests)) { qp->active_commands[cid] = NULL; TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); mtx_unlock(&qp->lock); } else { cmd->req = STAILQ_FIRST(&qp->pending_requests); STAILQ_REMOVE_HEAD(&qp->pending_requests, link); - qp->submitted++; - mtx_unlock(&qp->lock); nvmf_dispatch_command(qp, cmd); } req->cb(req->cb_arg, cqe); nvmf_free_capsule(nc); nvmf_free_request(req); } static void nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp, bool admin, u_int qid) { struct sysctl_ctx_list *ctx = &qp->sysctl_ctx; struct sysctl_oid *oid; struct sysctl_oid_list *list; char name[8]; if (admin) { oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); } else { snprintf(name, sizeof(name), "%u", qid); oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue"); } list = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD, NULL, qp->num_commands + 1, "Number of entries in queue"); SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd, 0, "Current head of submission queue (as observed by driver)"); SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail, 0, "Current tail of submission queue (as observed by driver)"); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD, &qp->submitted, 0, "Number of commands submitted"); } struct nvmf_host_qpair * nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, struct nvmf_handoff_qpair_params *handoff, const char *name, u_int qid) { struct nvmf_host_command *cmd, *ncmd; struct nvmf_host_qpair *qp; u_int i; qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO); qp->sc = sc; qp->sq_flow_control = handoff->sq_flow_control; qp->sqhd = handoff->sqhd; qp->sqtail = handoff->sqtail; strlcpy(qp->name, name, sizeof(qp->name)); mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF); (void)sysctl_ctx_init(&qp->sysctl_ctx); /* * Allocate a spare command slot for each pending AER command * on the admin queue. */ qp->num_commands = handoff->qsize - 1; if (handoff->admin) qp->num_commands += sc->num_aer; qp->active_commands = malloc(sizeof(*qp->active_commands) * qp->num_commands, M_NVMF, M_WAITOK | M_ZERO); TAILQ_INIT(&qp->free_commands); for (i = 0; i < qp->num_commands; i++) { cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO); cmd->cid = i; TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); } STAILQ_INIT(&qp->pending_requests); qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error, qp, nvmf_receive_capsule, qp); if (qp->qp == NULL) { (void)sysctl_ctx_free(&qp->sysctl_ctx); TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { TAILQ_REMOVE(&qp->free_commands, cmd, link); free(cmd, M_NVMF); } free(qp->active_commands, M_NVMF); mtx_destroy(&qp->lock); free(qp, M_NVMF); return (NULL); } nvmf_sysctls_qp(sc, qp, handoff->admin, qid); return (qp); } void nvmf_shutdown_qp(struct nvmf_host_qpair *qp) { struct nvmf_host_command *cmd; struct nvmf_request *req; struct nvmf_qpair *nq; mtx_lock(&qp->lock); nq = qp->qp; qp->qp = NULL; if (nq == NULL) { while (qp->shutting_down) mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0); mtx_unlock(&qp->lock); return; } qp->shutting_down = true; while (qp->allocating != 0) mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0); mtx_unlock(&qp->lock); nvmf_free_qpair(nq); /* * Abort outstanding requests. Active requests will have * their I/O completions invoked and associated capsules freed * by the transport layer via nvmf_free_qpair. Pending * requests must have their I/O completion invoked via * nvmf_abort_capsule_data. */ for (u_int i = 0; i < qp->num_commands; i++) { cmd = qp->active_commands[i]; if (cmd != NULL) { if (!cmd->req->aer) printf("%s: aborted active command %p (CID %u)\n", __func__, cmd->req, cmd->cid); /* This was freed by nvmf_free_qpair. */ cmd->req->nc = NULL; nvmf_abort_request(cmd->req, cmd->cid); nvmf_free_request(cmd->req); free(cmd, M_NVMF); } } while (!STAILQ_EMPTY(&qp->pending_requests)) { req = STAILQ_FIRST(&qp->pending_requests); STAILQ_REMOVE_HEAD(&qp->pending_requests, link); if (!req->aer) printf("%s: aborted pending command %p\n", __func__, req); nvmf_abort_capsule_data(req->nc, ECONNABORTED); nvmf_abort_request(req, 0); nvmf_free_request(req); } mtx_lock(&qp->lock); qp->shutting_down = false; mtx_unlock(&qp->lock); wakeup(qp); } void nvmf_destroy_qp(struct nvmf_host_qpair *qp) { struct nvmf_host_command *cmd, *ncmd; nvmf_shutdown_qp(qp); (void)sysctl_ctx_free(&qp->sysctl_ctx); TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { TAILQ_REMOVE(&qp->free_commands, cmd, link); free(cmd, M_NVMF); } free(qp->active_commands, M_NVMF); mtx_destroy(&qp->lock); free(qp, M_NVMF); } void nvmf_submit_request(struct nvmf_request *req) { struct nvmf_host_qpair *qp; struct nvmf_host_command *cmd; qp = req->qp; mtx_lock(&qp->lock); if (qp->qp == NULL) { mtx_unlock(&qp->lock); printf("%s: aborted pending command %p\n", __func__, req); nvmf_abort_capsule_data(req->nc, ECONNABORTED); nvmf_abort_request(req, 0); nvmf_free_request(req); return; } cmd = TAILQ_FIRST(&qp->free_commands); if (cmd == NULL) { /* * Queue this request. Will be sent after enough * in-flight requests have completed. */ STAILQ_INSERT_TAIL(&qp->pending_requests, req, link); mtx_unlock(&qp->lock); return; } TAILQ_REMOVE(&qp->free_commands, cmd, link); KASSERT(qp->active_commands[cmd->cid] == NULL, ("%s: CID already busy", __func__)); qp->active_commands[cmd->cid] = cmd; cmd->req = req; - qp->submitted++; - mtx_unlock(&qp->lock); nvmf_dispatch_command(qp, cmd); } diff --git a/sys/dev/nvmf/nvmf_transport.c b/sys/dev/nvmf/nvmf_transport.c index ea4aee8cc7ae..316d1571e61d 100644 --- a/sys/dev/nvmf/nvmf_transport.c +++ b/sys/dev/nvmf/nvmf_transport.c @@ -1,342 +1,350 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022-2024 Chelsio Communications, Inc. * Written by: John Baldwin */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Transport-independent support for fabrics queue pairs and commands. */ struct nvmf_transport { struct nvmf_transport_ops *nt_ops; volatile u_int nt_active_qpairs; SLIST_ENTRY(nvmf_transport) nt_link; }; /* nvmf_transports[nvmf_trtype] is sorted by priority */ static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1]; static struct sx nvmf_transports_lock; static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport", "NVMe over Fabrics transport"); SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "NVMe over Fabrics"); static bool nvmf_supported_trtype(enum nvmf_trtype trtype) { return (trtype < nitems(nvmf_transports)); } struct nvmf_qpair * nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, const struct nvmf_handoff_qpair_params *params, nvmf_qpair_error_t *error_cb, void *error_cb_arg, nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg) { struct nvmf_transport *nt; struct nvmf_qpair *qp; if (!nvmf_supported_trtype(trtype)) return (NULL); sx_slock(&nvmf_transports_lock); SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) { qp = nt->nt_ops->allocate_qpair(controller, params); if (qp != NULL) { refcount_acquire(&nt->nt_active_qpairs); break; } } sx_sunlock(&nvmf_transports_lock); if (qp == NULL) return (NULL); qp->nq_transport = nt; qp->nq_ops = nt->nt_ops; qp->nq_controller = controller; qp->nq_error = error_cb; qp->nq_error_arg = error_cb_arg; qp->nq_receive = receive_cb; qp->nq_receive_arg = receive_cb_arg; qp->nq_admin = params->admin; return (qp); } void nvmf_free_qpair(struct nvmf_qpair *qp) { struct nvmf_transport *nt; nt = qp->nq_transport; qp->nq_ops->free_qpair(qp); if (refcount_release(&nt->nt_active_qpairs)) wakeup(nt); } struct nvmf_capsule * nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how) { struct nvmf_capsule *nc; KASSERT(how == M_WAITOK || how == M_NOWAIT, ("%s: invalid how", __func__)); nc = qp->nq_ops->allocate_capsule(qp, how); if (nc == NULL) return (NULL); nc->nc_qpair = qp; nc->nc_qe_len = sizeof(struct nvme_command); memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len); /* 4.2 of NVMe base spec: Fabrics always uses SGL. */ nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT); nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL); return (nc); } struct nvmf_capsule * nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how) { struct nvmf_capsule *nc; KASSERT(how == M_WAITOK || how == M_NOWAIT, ("%s: invalid how", __func__)); nc = qp->nq_ops->allocate_capsule(qp, how); if (nc == NULL) return (NULL); nc->nc_qpair = qp; nc->nc_qe_len = sizeof(struct nvme_completion); memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len); return (nc); } int nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem, size_t len, bool send, nvmf_io_complete_t *complete_cb, void *cb_arg) { if (nc->nc_data.io_len != 0) return (EBUSY); nc->nc_send_data = send; nc->nc_data.io_mem = *mem; nc->nc_data.io_len = len; nc->nc_data.io_complete = complete_cb; nc->nc_data.io_complete_arg = cb_arg; return (0); } void nvmf_free_capsule(struct nvmf_capsule *nc) { nc->nc_qpair->nq_ops->free_capsule(nc); } int nvmf_transmit_capsule(struct nvmf_capsule *nc) { return (nc->nc_qpair->nq_ops->transmit_capsule(nc)); } void nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error) { if (nc->nc_data.io_len != 0) nvmf_complete_io_request(&nc->nc_data, 0, error); } void * nvmf_capsule_sqe(struct nvmf_capsule *nc) { KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), ("%s: capsule %p is not a command capsule", __func__, nc)); return (&nc->nc_sqe); } void * nvmf_capsule_cqe(struct nvmf_capsule *nc) { KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), ("%s: capsule %p is not a response capsule", __func__, nc)); return (&nc->nc_cqe); } +bool +nvmf_sqhd_valid(struct nvmf_capsule *nc) +{ + KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), + ("%s: capsule %p is not a response capsule", __func__, nc)); + return (nc->nc_sqhd_valid); +} + uint8_t nvmf_validate_command_capsule(struct nvmf_capsule *nc) { KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), ("%s: capsule %p is not a command capsule", __func__, nc)); if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL) return (NVME_SC_INVALID_FIELD); return (nc->nc_qpair->nq_ops->validate_command_capsule(nc)); } size_t nvmf_capsule_data_len(const struct nvmf_capsule *nc) { return (nc->nc_qpair->nq_ops->capsule_data_len(nc)); } int nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb, void *cb_arg) { struct nvmf_io_request io; io.io_mem = *mem; io.io_len = len; io.io_complete = complete_cb; io.io_complete_arg = cb_arg; return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset, &io)); } u_int nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, struct mbuf *m, size_t len) { MPASS(m_length(m, NULL) == len); return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m, len)); } int nvmf_transport_module_handler(struct module *mod, int what, void *arg) { struct nvmf_transport_ops *ops = arg; struct nvmf_transport *nt, *nt2, *prev; int error; switch (what) { case MOD_LOAD: if (!nvmf_supported_trtype(ops->trtype)) { printf("NVMF: Unsupported transport %u", ops->trtype); return (EINVAL); } nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO); nt->nt_ops = arg; sx_xlock(&nvmf_transports_lock); if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) { SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt, nt_link); } else { prev = NULL; SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype], nt_link) { if (ops->priority > nt2->nt_ops->priority) break; prev = nt2; } if (prev == NULL) SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt, nt_link); else SLIST_INSERT_AFTER(prev, nt, nt_link); } sx_xunlock(&nvmf_transports_lock); return (0); case MOD_QUIESCE: if (!nvmf_supported_trtype(ops->trtype)) return (0); sx_slock(&nvmf_transports_lock); SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { if (nt->nt_ops == ops) break; } if (nt == NULL) { sx_sunlock(&nvmf_transports_lock); return (0); } if (nt->nt_active_qpairs != 0) { sx_sunlock(&nvmf_transports_lock); return (EBUSY); } sx_sunlock(&nvmf_transports_lock); return (0); case MOD_UNLOAD: if (!nvmf_supported_trtype(ops->trtype)) return (0); sx_xlock(&nvmf_transports_lock); prev = NULL; SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { if (nt->nt_ops == ops) break; prev = nt; } if (nt == NULL) { sx_xunlock(&nvmf_transports_lock); return (0); } if (prev == NULL) SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype], nt_link); else SLIST_REMOVE_AFTER(prev, nt_link); error = 0; while (nt->nt_active_qpairs != 0 && error == 0) error = sx_sleep(nt, &nvmf_transports_lock, PCATCH, "nftunld", 0); sx_xunlock(&nvmf_transports_lock); if (error != 0) return (error); free(nt, M_NVMF_TRANSPORT); return (0); default: return (EOPNOTSUPP); } } static int nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused) { switch (what) { case MOD_LOAD: for (u_int i = 0; i < nitems(nvmf_transports); i++) SLIST_INIT(&nvmf_transports[i]); sx_init(&nvmf_transports_lock, "nvmf transports"); return (0); default: return (EOPNOTSUPP); } } static moduledata_t nvmf_transport_mod = { "nvmf_transport", nvmf_transport_modevent, 0 }; DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(nvmf_transport, 1); diff --git a/sys/dev/nvmf/nvmf_transport.h b/sys/dev/nvmf/nvmf_transport.h index 549170b25940..bbd830eba576 100644 --- a/sys/dev/nvmf/nvmf_transport.h +++ b/sys/dev/nvmf/nvmf_transport.h @@ -1,140 +1,141 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022-2024 Chelsio Communications, Inc. * Written by: John Baldwin */ #ifndef __NVMF_TRANSPORT_H__ #define __NVMF_TRANSPORT_H__ /* * Interface used by the Fabrics host (initiator) and controller * (target) to send and receive capsules and associated data. */ #include #include struct mbuf; struct memdesc; struct nvmf_capsule; struct nvmf_connection; struct nvmf_qpair; struct nvmf_handoff_qpair_params; SYSCTL_DECL(_kern_nvmf); /* * Callback to invoke when an error occurs on a qpair. The last * parameter is an error value. If the error value is zero, the qpair * has been closed at the transport level rather than a transport * error occuring. */ typedef void nvmf_qpair_error_t(void *, int); /* Callback to invoke when a capsule is received. */ typedef void nvmf_capsule_receive_t(void *, struct nvmf_capsule *); /* * Callback to invoke when an I/O request has completed. The second * parameter is the amount of data transferred. The last parameter is * an error value which is non-zero if the request did not complete * successfully. A request with an error may complete partially. */ typedef void nvmf_io_complete_t(void *, size_t, int); /* * A queue pair represents either an Admin or I/O * submission/completion queue pair. The params contains negotiated * values passed in from userland. * * Unlike libnvmf in userland, the kernel transport interface does not * have any notion of an association. Instead, qpairs are * independent. */ struct nvmf_qpair *nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, const struct nvmf_handoff_qpair_params *params, nvmf_qpair_error_t *error_cb, void *error_cb_arg, nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg); void nvmf_free_qpair(struct nvmf_qpair *qp); /* * Capsules are either commands (host -> controller) or responses * (controller -> host). A data buffer may be associated with a * command capsule. Transmitted data is not copied by this API but * instead must be preserved until the completion callback is invoked * to indicate capsule transmission has completed. */ struct nvmf_capsule *nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how); struct nvmf_capsule *nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how); void nvmf_free_capsule(struct nvmf_capsule *nc); int nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem, size_t len, bool send, nvmf_io_complete_t *complete_cb, void *cb_arg); int nvmf_transmit_capsule(struct nvmf_capsule *nc); void nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error); void *nvmf_capsule_sqe(struct nvmf_capsule *nc); void *nvmf_capsule_cqe(struct nvmf_capsule *nc); +bool nvmf_sqhd_valid(struct nvmf_capsule *nc); /* Controller-specific APIs. */ /* * A controller calls this function to check for any * transport-specific errors (invalid fields) in a received command * capsule. The callback returns a generic command status value: * NVME_SC_SUCCESS if no error is found. */ uint8_t nvmf_validate_command_capsule(struct nvmf_capsule *nc); /* * A controller calls this function to query the amount of data * associated with a command capsule. */ size_t nvmf_capsule_data_len(const struct nvmf_capsule *cc); /* * A controller calls this function to receive data associated with a * command capsule (e.g. the data for a WRITE command). This can * either return in-capsule data or fetch data from the host * (e.g. using a R2T PDU over TCP). The received command capsule * should be passed in 'nc'. The received data is stored in 'mem'. * If this function returns success, then the callback will be invoked * once the operation has completed. Note that the callback might be * invoked before this function returns. */ int nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb, void *cb_arg); /* * A controller calls this function to send data in response to a * command prior to sending a response capsule. If an error occurs, * the function returns a generic status completion code to be sent in * the following CQE. Note that the transfer might send a subset of * the data requested by nc. If the transfer succeeds, this function * can return one of the following values: * * - NVME_SC_SUCCESS: The transfer has completed successfully and the * caller should send a success CQE in a response capsule. * * - NVMF_SUCCESS_SENT: The transfer has completed successfully and * the transport layer has sent an implicit success CQE to the * remote host (e.g. the SUCCESS flag for TCP). The caller should * not send a response capsule. * * - NVMF_MORE: The transfer has completed successfully, but the * transfer did not complete the data buffer. * * The mbuf chain in 'm' is consumed by this function even if an error * is returned. */ u_int nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, struct mbuf *m, size_t len); #define NVMF_SUCCESS_SENT 0x100 #define NVMF_MORE 0x101 #endif /* !__NVMF_TRANSPORT_H__ */