Index: head/sys/dev/iser/icl_iser.c =================================================================== --- head/sys/dev/iser/icl_iser.c (revision 300726) +++ head/sys/dev/iser/icl_iser.c (revision 300727) @@ -1,582 +1,561 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "icl_iser.h" SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module"); int iser_debug = 0; SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, &iser_debug, 0, "Enable iser debug messages"); static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); static uma_zone_t icl_pdu_zone; static volatile u_int icl_iser_ncons; struct iser_global ig; +static void iser_conn_release(struct icl_conn *ic); + static icl_conn_new_pdu_t iser_conn_new_pdu; static icl_conn_pdu_free_t iser_conn_pdu_free; static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; static icl_conn_pdu_queue_t iser_conn_pdu_queue; static icl_conn_handoff_t iser_conn_handoff; static icl_conn_free_t iser_conn_free; static icl_conn_close_t iser_conn_close; -static icl_conn_release_t iser_conn_release; static icl_conn_connect_t iser_conn_connect; -static icl_conn_connected_t iser_conn_connected; static icl_conn_task_setup_t iser_conn_task_setup; static icl_conn_task_done_t iser_conn_task_done; static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; static kobj_method_t icl_iser_methods[] = { KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), KOBJMETHOD(icl_conn_free, iser_conn_free), KOBJMETHOD(icl_conn_close, iser_conn_close), - KOBJMETHOD(icl_conn_release, iser_conn_release), KOBJMETHOD(icl_conn_connect, iser_conn_connect), - KOBJMETHOD(icl_conn_connected, iser_conn_connected), KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), { 0, 0 } }; DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); /** * iser_initialize_headers() - Initialize task headers * @pdu: iser pdu * @iser_conn: iser connection * * Notes: * This routine may race with iser teardown flow for scsi * error handling TMFs. So for TMF we should acquire the * state mutex to avoid dereferencing the IB device which * may have already been terminated (racing teardown sequence). */ int iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) { struct iser_tx_desc *tx_desc = &pdu->desc; struct iser_device *device = iser_conn->ib_conn.device; u64 dma_addr; int ret = 0; dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) { ret = -ENOMEM; goto out; } tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; out: return (ret); } int iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST || request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) { ISER_DBG("copy to login buff"); memcpy(iser_conn->login_req_buf, addr, len); request->ip_data_len = len; } return (0); } void iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { /* If we have a receive data, copy it to upper layer buffer */ if (ip->ip_data_mbuf) memcpy(addr, ip->ip_data_mbuf + off, len); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * iser_new_pdu(struct icl_conn *ic, int flags) { struct icl_iser_pdu *iser_pdu; struct icl_pdu *ip; struct iser_conn *iser_conn = icl_to_iser_conn(ic); iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); if (iser_pdu == NULL) { ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); return (NULL); } iser_pdu->iser_conn = iser_conn; ip = &iser_pdu->icl_pdu; ip->ip_conn = ic; ip->ip_bhs = &iser_pdu->desc.iscsi_header; return (ip); } struct icl_pdu * iser_conn_new_pdu(struct icl_conn *ic, int flags) { return (iser_new_pdu(ic, flags)); } void iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); uma_zfree(icl_pdu_zone, iser_pdu); } size_t iser_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } void iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { iser_pdu_free(ic, ip); } static bool is_control_opcode(uint8_t opcode) { bool is_control = false; switch (opcode & ISCSI_OPCODE_MASK) { case ISCSI_BHS_OPCODE_NOP_OUT: case ISCSI_BHS_OPCODE_LOGIN_REQUEST: case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: case ISCSI_BHS_OPCODE_TEXT_REQUEST: is_control = true; break; case ISCSI_BHS_OPCODE_SCSI_COMMAND: is_control = false; break; default: ISER_ERR("unknown opcode %d", opcode); } return (is_control); } void iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); int ret; ret = iser_initialize_headers(iser_pdu, iser_conn); if (ret) { ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); return; } if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { ret = iser_send_control(iser_conn, iser_pdu); if (unlikely(ret)) ISER_ERR("Failed to send control pdu %p", iser_pdu); } else { ret = iser_send_command(iser_conn, iser_pdu); if (unlikely(ret)) ISER_ERR("Failed to send command pdu %p", iser_pdu); } } static struct icl_conn * iser_new_conn(const char *name, struct mtx *lock) { struct iser_conn *iser_conn; struct icl_conn *ic; refcount_acquire(&icl_iser_ncons); iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); if (!iser_conn) { ISER_ERR("failed to allocate iser conn"); refcount_release(&icl_iser_ncons); return (NULL); } cv_init(&iser_conn->up_cv, "iser_cv"); sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF); cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF); ic = &iser_conn->icl_conn; ic->ic_lock = lock; ic->ic_name = name; - ic->ic_driver = strdup("iser", M_TEMP); + ic->ic_offload = strdup("iser", M_TEMP); ic->ic_iser = true; + ic->ic_unmapped = true; return (ic); } void iser_conn_free(struct icl_conn *ic) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); + iser_conn_release(ic); cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); sx_destroy(&iser_conn->state_mutex); cv_destroy(&iser_conn->up_cv); kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); refcount_release(&icl_iser_ncons); } int -iser_conn_handoff(struct icl_conn *ic, int cmds_max) +iser_conn_handoff(struct icl_conn *ic, int fd) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); int error = 0; sx_xlock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { error = EINVAL; ISER_ERR("iser_conn %p state is %d, teardown started\n", iser_conn, iser_conn->state); goto out; } - /* - * In discovery session no need to allocate rx desc and posting recv - * work request - */ - if (ic->ic_session_type_discovery(ic)) - goto out; - - error = iser_alloc_rx_descriptors(iser_conn, cmds_max); + error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); if (error) goto out; error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); if (error) goto post_error; + iser_conn->handoff_done = true; + sx_xunlock(&iser_conn->state_mutex); return (error); post_error: iser_free_rx_descriptors(iser_conn); out: sx_xunlock(&iser_conn->state_mutex); return (error); } /** * Frees all conn objects */ -void +static void iser_conn_release(struct icl_conn *ic) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_conn *curr, *tmp; mtx_lock(&ig.connlist_mutex); /* * Search for iser connection in global list. * It may not be there in case of failure in connection establishment * stage. */ list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { if (iser_conn == curr) { ISER_WARN("found iser_conn %p", iser_conn); list_del(&iser_conn->conn_list); } } mtx_unlock(&ig.connlist_mutex); /* * In case we reconnecting or removing session, we need to * release IB resources (which is safe to call more than once). */ sx_xlock(&iser_conn->state_mutex); iser_free_ib_conn_res(iser_conn, true); sx_xunlock(&iser_conn->state_mutex); if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } } void iser_conn_close(struct icl_conn *ic) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); ISER_INFO("closing conn %p", iser_conn); sx_xlock(&iser_conn->state_mutex); /* * In case iser connection is waiting on conditional variable * (state PENDING) and we try to close it before connection establishment, * we need to signal it to continue releasing connection properly. */ if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) cv_signal(&iser_conn->up_cv); sx_xunlock(&iser_conn->state_mutex); } int iser_conn_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; + iser_conn_release(ic); + sx_xlock(&iser_conn->state_mutex); /* the device is known only --after-- address resolution */ ib_conn->device = NULL; + iser_conn->handoff_done = false; iser_conn->state = ISER_CONN_PENDING; ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { err = -PTR_ERR(ib_conn->cma_id); ISER_ERR("rdma_create_id failed: %d", err); goto id_failure; } err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); if (err) { ISER_ERR("rdma_resolve_addr failed: %d", err); if (err < 0) err = -err; goto addr_failure; } ISER_DBG("before cv_wait: %p", iser_conn); cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); ISER_DBG("after cv_wait: %p", iser_conn); if (iser_conn->state != ISER_CONN_UP) { err = EIO; goto addr_failure; } err = iser_alloc_login_buf(iser_conn); if (err) goto addr_failure; sx_xunlock(&iser_conn->state_mutex); mtx_lock(&ig.connlist_mutex); list_add(&iser_conn->conn_list, &ig.connlist); mtx_unlock(&ig.connlist_mutex); return (0); id_failure: ib_conn->cma_id = NULL; addr_failure: sx_xunlock(&iser_conn->state_mutex); return (err); } -/** - * Called with session spinlock held. - * No need to lock state mutex on an advisory check. - **/ -bool -iser_conn_connected(struct icl_conn *ic) -{ - struct iser_conn *iser_conn = icl_to_iser_conn(ic); - - return (iser_conn->state == ISER_CONN_UP); -} - int -iser_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, - uint32_t *task_tagp, void **prvp, struct icl_pdu *ip) +iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, + struct ccb_scsiio *csio, + uint32_t *task_tagp, void **prvp) { struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); *prvp = ip; iser_pdu->csio = csio; return (0); } void iser_conn_task_done(struct icl_conn *ic, void *prv) { struct icl_pdu *ip = prv; struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; struct iser_tx_desc *tx_desc = &iser_pdu->desc; if (iser_pdu->dir[ISER_DIR_IN]) { iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); iser_dma_unmap_task_data(iser_pdu, &iser_pdu->data[ISER_DIR_IN], DMA_FROM_DEVICE); } if (iser_pdu->dir[ISER_DIR_OUT]) { iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); iser_dma_unmap_task_data(iser_pdu, &iser_pdu->data[ISER_DIR_OUT], DMA_TO_DEVICE); } if (likely(tx_desc->mapped)) { ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); tx_desc->mapped = false; } iser_pdu_free(ic, ip); } -static u_int32_t -iser_hba_misc() -{ - return (PIM_UNMAPPED); -} - static int iser_limits(size_t *limitp) { *limitp = 128 * 1024; return (0); } static int icl_iser_load(void) { int error; ISER_DBG("Starting iSER datamover..."); icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* FIXME: Check rc */ refcount_init(&icl_iser_ncons, 0); - error = icl_register("iser", 0, iser_limits, iser_new_conn, iser_hba_misc); + error = icl_register("iser", true, 0, iser_limits, iser_new_conn); KASSERT(error == 0, ("failed to register iser")); memset(&ig, 0, sizeof(struct iser_global)); /* device init is called only after the first addr resolution */ sx_init(&ig.device_list_mutex, "global_device_lock"); INIT_LIST_HEAD(&ig.device_list); mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF); INIT_LIST_HEAD(&ig.connlist); sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); return (error); } static int icl_iser_unload(void) { ISER_DBG("Removing iSER datamover..."); if (icl_iser_ncons != 0) return (EBUSY); sx_destroy(&ig.close_conns_mutex); mtx_destroy(&ig.connlist_mutex); sx_destroy(&ig.device_list_mutex); - icl_unregister("iser"); + icl_unregister("iser", true); uma_zdestroy(icl_pdu_zone); return (0); } static int icl_iser_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_iser_load()); case MOD_UNLOAD: return (icl_iser_unload()); default: return (EINVAL); } } moduledata_t icl_iser_data = { .name = "icl_iser", .evhand = icl_iser_modevent, .priv = 0 }; DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_iser, icl, 1, 1, 1); -MODULE_DEPEND(icl_iser, iscsi, 1, 1, 1); MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1); MODULE_VERSION(icl_iser, 1); - Index: head/sys/dev/iser/icl_iser.h =================================================================== --- head/sys/dev/iser/icl_iser.h (revision 300726) +++ head/sys/dev/iser/icl_iser.h (revision 300727) @@ -1,547 +1,551 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef ICL_ISER_H #define ICL_ISER_H /* * iSCSI Common Layer for RDMA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ISER_DBG(X, ...) \ do { \ if (unlikely(iser_debug > 2)) \ printf("DEBUG: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } while (0) #define ISER_INFO(X, ...) \ do { \ if (unlikely(iser_debug > 1)) \ printf("INFO: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } while (0) #define ISER_WARN(X, ...) \ do { \ if (unlikely(iser_debug > 0)) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define ISER_ERR(X, ...) \ printf("ERROR: %s: " X "\n", __func__, ## __VA_ARGS__) #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 #define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL #define ISER_BEACON_WRID 0xfffffffffffffffeULL #define SHIFT_4K 12 #define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISER_DEF_XMIT_CMDS_MAX 256 /* the max RX (recv) WR supported by the iSER QP is defined by * * max_recv_wr = commands_max + recv_beacon */ #define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX + 1) #define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ #define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), SCSI_TMFUNC(2), LOGOUT(1) */ /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * * to have at max for SCSI command. The tx posting & completion handling code * * supports -EAGAIN scheme where tx is suspended till the QP has room for more * * send WR. D=8 comes from 64K/8K */ #define ISER_INFLIGHT_DATAOUTS 8 /* the send_beacon increase the max_send_wr by 1 */ #define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ (1 + ISER_INFLIGHT_DATAOUTS) + \ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS + 1) #define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \ - ISER_MAX_TX_MISC_PDUS \ - ISER_MAX_RX_MISC_PDUS - 1) / \ (1 + ISER_INFLIGHT_DATAOUTS)) #define ISER_WC_BATCH_COUNT 16 #define ISER_SIGNAL_CMD_COUNT 32 /* Maximal QP's recommended per CQ. In case we use more QP's per CQ we might * * encounter a CQ overrun state. */ #define ISCSI_ISER_MAX_CONN 8 #define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) #define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ ISCSI_ISER_MAX_CONN) #define ISER_ZBVA_NOT_SUPPORTED 0x80 #define ISER_SEND_W_INV_NOT_SUPPORTED 0x40 +#define ISCSI_DEF_MAX_RECV_SEG_LEN 8192 +#define ISCSI_OPCODE_MASK 0x3f + #define icl_to_iser_conn(ic) \ container_of(ic, struct iser_conn, icl_conn) #define icl_to_iser_pdu(ip) \ container_of(ip, struct icl_iser_pdu, icl_pdu) /** * struct iser_hdr - iSER header * * @flags: flags support (zbva, remote_inv) * @rsvd: reserved * @write_stag: write rkey * @write_va: write virtual address * @reaf_stag: read rkey * @read_va: read virtual address */ struct iser_hdr { u8 flags; u8 rsvd[3]; __be32 write_stag; __be64 write_va; __be32 read_stag; __be64 read_va; } __attribute__((packed)); struct iser_cm_hdr { u8 flags; u8 rsvd[3]; } __packed; /* Constant PDU lengths calculations */ #define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + ISCSI_BHS_SIZE) #define ISER_RECV_DATA_SEG_LEN 128 #define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) #define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN) enum iser_conn_state { ISER_CONN_INIT, /* descriptor allocd, no conn */ ISER_CONN_PENDING, /* in the process of being established */ ISER_CONN_UP, /* up and running */ ISER_CONN_TERMINATING, /* in the process of being terminated */ ISER_CONN_DOWN, /* shut down */ ISER_CONN_STATES_NUM }; enum iser_task_status { ISER_TASK_STATUS_INIT = 0, ISER_TASK_STATUS_STARTED, ISER_TASK_STATUS_COMPLETED }; enum iser_data_dir { ISER_DIR_IN = 0, /* to initiator */ ISER_DIR_OUT, /* from initiator */ ISER_DIRS_NUM }; /** * struct iser_mem_reg - iSER memory registration info * * @sge: memory region sg element * @rkey: memory region remote key * @mem_h: pointer to registration context (FMR/Fastreg) */ struct iser_mem_reg { struct ib_sge sge; u32 rkey; void *mem_h; }; enum iser_desc_type { ISCSI_TX_CONTROL , ISCSI_TX_SCSI_COMMAND, ISCSI_TX_DATAOUT }; /** * struct iser_data_buf - iSER data buffer * * @sg: pointer to the sg list * @size: num entries of this sg * @data_len: total beffer byte len * @dma_nents: returned by dma_map_sg * @copy_buf: allocated copy buf for SGs unaligned * for rdma which are copied * @orig_sg: pointer to the original sg list (in case * we used a copy) * @sg_single: SG-ified clone of a non SG SC or * unaligned SG */ struct iser_data_buf { struct scatterlist sgl[ISCSI_ISER_SG_TABLESIZE]; void *sg; unsigned int size; unsigned long data_len; unsigned int dma_nents; char *copy_buf; struct scatterlist *orig_sg; struct scatterlist sg_single; }; /* fwd declarations */ struct iser_conn; struct ib_conn; struct iser_device; /** * struct iser_tx_desc - iSER TX descriptor (for send wr_id) * * @iser_header: iser header * @iscsi_header: iscsi header (bhs) * @type: command/control/dataout * @dma_addr: header buffer dma_address * @tx_sg: sg[0] points to iser/iscsi headers * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task * @mapped: indicates if the descriptor is dma mapped */ struct iser_tx_desc { struct iser_hdr iser_header; struct iscsi_bhs iscsi_header __attribute__((packed)); enum iser_desc_type type; u64 dma_addr; struct ib_sge tx_sg[2]; int num_sge; bool mapped; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ sizeof(u64) + sizeof(struct ib_sge))) /** * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) * * @iser_header: iser header * @iscsi_header: iscsi header * @data: received data segment * @dma_addr: receive buffer dma address * @rx_sg: ib_sge of receive buffer * @pad: for sense data TODO: Modify to maximum sense length supported */ struct iser_rx_desc { struct iser_hdr iser_header; struct iscsi_bhs iscsi_header; char data[ISER_RECV_DATA_SEG_LEN]; u64 dma_addr; struct ib_sge rx_sg; char pad[ISER_RX_PAD_SIZE]; } __attribute__((packed)); struct icl_iser_pdu { struct icl_pdu icl_pdu; struct iser_tx_desc desc; struct iser_conn *iser_conn; enum iser_task_status status; struct ccb_scsiio *csio; int command_sent; int dir[ISER_DIRS_NUM]; struct iser_mem_reg rdma_reg[ISER_DIRS_NUM]; struct iser_data_buf data[ISER_DIRS_NUM]; }; /** * struct iser_comp - iSER completion context * * @device: pointer to device handle * @cq: completion queue * @wcs: work completion array * @tq: taskqueue handle * @task: task to run task_fn * @active_qps: Number of active QPs attached * to completion context */ struct iser_comp { struct iser_device *device; struct ib_cq *cq; struct ib_wc wcs[ISER_WC_BATCH_COUNT]; struct taskqueue *tq; struct task task; int active_qps; }; /** * struct iser_device - iSER device handle * * @ib_device: RDMA device * @pd: Protection Domain for this device * @dev_attr: Device attributes container * @mr: Global DMA memory region * @event_handler: IB events handle routine * @ig_list: entry in devices list * @refcount: Reference counter, dominated by open iser connections * @comps_used: Number of completion contexts used, Min between online * cpus and device max completion vectors * @comps: Dinamically allocated array of completion handlers */ struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; struct ib_device_attr dev_attr; struct ib_mr *mr; struct ib_event_handler event_handler; struct list_head ig_list; int refcount; int comps_used; struct iser_comp *comps; }; /** * struct iser_reg_resources - Fast registration recources * * @mr: memory region * @frpl: fast reg page list * @mr_valid: is mr valid indicator */ struct iser_reg_resources { struct ib_mr *mr; struct ib_fast_reg_page_list *frpl; u8 mr_valid:1; }; /** * struct fast_reg_descriptor - Fast registration descriptor * * @list: entry in connection fastreg pool * @rsc: data buffer registration resources */ struct fast_reg_descriptor { struct list_head list; struct iser_reg_resources rsc; }; /** * struct iser_beacon - beacon to signal all flush errors were drained * * @send: send wr * @recv: recv wr * @flush_lock: protects flush_cv * @flush_cv: condition variable for beacon flush */ struct iser_beacon { union { struct ib_send_wr send; struct ib_recv_wr recv; }; struct mtx flush_lock; struct cv flush_cv; }; /** * struct ib_conn - Infiniband related objects * * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair * @device: reference to iser device * @comp: iser completion context */ struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; int post_recv_buf_count; u8 sig_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; struct iser_beacon beacon; struct mtx lock; union { struct { struct ib_fmr_pool *pool; struct iser_page_vec *page_vec; } fmr; struct { struct list_head pool; int pool_size; } fastreg; }; }; struct iser_conn { struct icl_conn icl_conn; struct ib_conn ib_conn; struct cv up_cv; struct list_head conn_list; struct sx state_mutex; enum iser_conn_state state; int qp_max_recv_dtos; int min_posted_rx; u16 max_cmds; char *login_buf; char *login_req_buf, *login_resp_buf; u64 login_req_dma, login_resp_dma; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; + bool handoff_done; }; /** * struct iser_global: iSER global context * * @device_list_mutex: protects device_list * @device_list: iser devices global list * @connlist_mutex: protects connlist * @connlist: iser connections global list * @desc_cache: kmem cache for tx dataout * @close_conns_mutex: serializes conns closure */ struct iser_global { struct sx device_list_mutex; struct list_head device_list; struct mtx connlist_mutex; struct list_head connlist; struct sx close_conns_mutex; }; extern struct iser_global ig; extern int iser_debug; void iser_create_send_desc(struct iser_conn *, struct iser_tx_desc *); int iser_post_recvl(struct iser_conn *); int iser_post_recvm(struct iser_conn *, int); int iser_alloc_login_buf(struct iser_conn *iser_conn); void iser_free_login_buf(struct iser_conn *iser_conn); int iser_post_send(struct ib_conn *, struct iser_tx_desc *, bool); void iser_snd_completion(struct iser_tx_desc *, struct ib_conn *); void iser_rcv_completion(struct iser_rx_desc *, unsigned long, struct ib_conn *); void iser_pdu_free(struct icl_conn *, struct icl_pdu *); struct icl_pdu * iser_new_pdu(struct icl_conn *ic, int flags); int iser_alloc_rx_descriptors(struct iser_conn *, int); void iser_free_rx_descriptors(struct iser_conn *); int iser_initialize_headers(struct icl_iser_pdu *, struct iser_conn *); int iser_send_control(struct iser_conn *, struct icl_iser_pdu *); int iser_send_command(struct iser_conn *, struct icl_iser_pdu *); int iser_reg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir); void iser_unreg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir); int iser_create_fastreg_pool(struct ib_conn *, unsigned); void iser_free_fastreg_pool(struct ib_conn *); int iser_dma_map_task_data(struct icl_iser_pdu *, struct iser_data_buf *, enum iser_data_dir, enum dma_data_direction); int iser_conn_terminate(struct iser_conn *); void iser_free_ib_conn_res(struct iser_conn *, bool); void iser_dma_unmap_task_data(struct icl_iser_pdu *, struct iser_data_buf *, enum dma_data_direction); int iser_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *); #endif /* !ICL_ISER_H */ Index: head/sys/dev/iser/iser_initiator.c =================================================================== --- head/sys/dev/iser/iser_initiator.c (revision 300726) +++ head/sys/dev/iser/iser_initiator.c (revision 300727) @@ -1,539 +1,538 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "icl_iser.h" static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend"); /* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_IN].data_len, Protection size * os stored in task->prot[ISER_DIR_IN].data_len */ static int iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu) { struct iser_hdr *hdr = &iser_pdu->desc.iser_header; struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN]; struct iser_mem_reg *mem_reg; int err; err = iser_dma_map_task_data(iser_pdu, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return (err); err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN); if (err) { ISER_ERR("Failed to set up Data-IN RDMA"); return (err); } mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(mem_reg->rkey); hdr->read_va = cpu_to_be64(mem_reg->sge.addr); return (0); } /* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_OUT].data_len, Protection size * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu) { struct iser_hdr *hdr = &iser_pdu->desc.iser_header; struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT]; struct iser_mem_reg *mem_reg; int err; err = iser_dma_map_task_data(iser_pdu, buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return (err); err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT); if (err) { ISER_ERR("Failed to set up Data-out RDMA"); return (err); } mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT]; hdr->flags |= ISER_WSV; hdr->write_stag = cpu_to_be32(mem_reg->rkey); hdr->write_va = cpu_to_be64(mem_reg->sge.addr); return (0); } /* creates a new tx descriptor and adds header regd buffer */ void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); tx_desc->iser_header.flags = ISER_VER; tx_desc->num_sge = 1; if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { tx_desc->tx_sg[0].lkey = device->mr->lkey; ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc); } } void iser_free_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; if (!iser_conn->login_buf) return; if (iser_conn->login_req_dma) ib_dma_unmap_single(device->ib_device, iser_conn->login_req_dma, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); if (iser_conn->login_resp_dma) ib_dma_unmap_single(device->ib_device, iser_conn->login_resp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); free(iser_conn->login_buf, M_ISER_INITIATOR); /* make sure we never redo any unmapping */ iser_conn->login_req_dma = 0; iser_conn->login_resp_dma = 0; iser_conn->login_buf = NULL; } int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; int req_err, resp_err; BUG_ON(device == NULL); iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, M_ISER_INITIATOR, M_WAITOK | M_ZERO); if (!iser_conn->login_buf) goto out_err; iser_conn->login_req_buf = iser_conn->login_buf; iser_conn->login_resp_buf = iser_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, iser_conn->login_req_buf, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, iser_conn->login_resp_buf, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, iser_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, iser_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) iser_conn->login_req_dma = 0; if (resp_err) iser_conn->login_resp_dma = 0; goto free_login_buf; } return (0); free_login_buf: iser_free_login_buf(iser_conn); out_err: ISER_DBG("unable to alloc or map login buf"); return (ENOMEM); } int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max) { int i, j; u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; iser_conn->qp_max_recv_dtos = cmds_max; iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; if (iser_create_fastreg_pool(ib_conn, cmds_max)) goto create_rdma_reg_res_failed; iser_conn->num_rx_descs = cmds_max; iser_conn->rx_descs = malloc(iser_conn->num_rx_descs * sizeof(struct iser_rx_desc), M_ISER_INITIATOR, M_WAITOK | M_ZERO); if (!iser_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) goto rx_desc_dma_map_failed; rx_desc->dma_addr = dma_addr; rx_sg = &rx_desc->rx_sg; rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; rx_sg->lkey = device->mr->lkey; } iser_conn->rx_desc_head = 0; return (0); rx_desc_dma_map_failed: rx_desc = iser_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); free(iser_conn->rx_descs, M_ISER_INITIATOR); iser_conn->rx_descs = NULL; rx_desc_alloc_fail: iser_free_fastreg_pool(ib_conn); create_rdma_reg_res_failed: ISER_ERR("failed allocating rx descriptors / data buffers"); return (ENOMEM); } void iser_free_rx_descriptors(struct iser_conn *iser_conn) { int i; struct iser_rx_desc *rx_desc; struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; iser_free_fastreg_pool(ib_conn); rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); free(iser_conn->rx_descs, M_ISER_INITIATOR); /* make sure we never redo any unmapping */ iser_conn->rx_descs = NULL; } static void iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf) { struct scatterlist *sg; int i; size_t len, tlen; int offset; tlen = data_buf->data_len; for (i = 0; 0 < tlen; i++, tlen -= len) { sg = &data_buf->sgl[i]; offset = ((uintptr_t)buf) & ~PAGE_MASK; len = min(PAGE_SIZE - offset, tlen); sg_set_buf(sg, buf, len); buf = (void *)(((u64)buf) + (u64)len); } data_buf->size = i; sg_mark_end(sg); } static void iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf) { struct scatterlist *sg; int i; size_t len, tlen; int offset; tlen = bp->bio_bcount; offset = bp->bio_ma_offset; for (i = 0; 0 < tlen; i++, tlen -= len) { sg = &data_buf->sgl[i]; len = min(PAGE_SIZE - offset, tlen); sg_set_page(sg, bp->bio_ma[i], len, offset); offset = 0; } data_buf->size = i; sg_mark_end(sg); } static int iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf) { struct ccb_hdr *ccbh; int err = 0; ccbh = &csio->ccb_h; switch ((ccbh->flags & CAM_DATA_MASK)) { case CAM_DATA_BIO: iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf); break; case CAM_DATA_VADDR: /* * Support KVA buffers for various scsi commands such as: * - REPORT_LUNS * - MODE_SENSE_6 * - INQUIRY * - SERVICE_ACTION_IN. * The data of these commands always mapped into KVA. */ iser_buf_to_sg(csio->data_ptr, data_buf); break; default: ISER_ERR("flags 0x%X unimplemented", ccbh->flags); err = EINVAL; } return (err); } static inline bool iser_signal_comp(u8 sig_count) { return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); } int iser_send_command(struct iser_conn *iser_conn, struct icl_iser_pdu *iser_pdu) { struct iser_data_buf *data_buf; struct iser_tx_desc *tx_desc = &iser_pdu->desc; struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header); struct ccb_scsiio *csio = iser_pdu->csio; int err = 0; u8 sig_count = ++iser_conn->ib_conn.sig_count; /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; iser_create_send_desc(iser_conn, tx_desc); if (hdr->bhssc_flags & BHSSC_FLAGS_R) { data_buf = &iser_pdu->data[ISER_DIR_IN]; } else { data_buf = &iser_pdu->data[ISER_DIR_OUT]; } data_buf->sg = csio->data_ptr; data_buf->data_len = csio->dxfer_len; if (likely(csio->dxfer_len)) { err = iser_csio_to_sg(csio, data_buf); if (unlikely(err)) goto send_command_error; } if (hdr->bhssc_flags & BHSSC_FLAGS_R) { err = iser_prepare_read_cmd(iser_pdu); if (err) goto send_command_error; } else if (hdr->bhssc_flags & BHSSC_FLAGS_W) { err = iser_prepare_write_cmd(iser_pdu); if (err) goto send_command_error; } err = iser_post_send(&iser_conn->ib_conn, tx_desc, iser_signal_comp(sig_count)); if (!err) return (0); send_command_error: ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn, hdr->bhssc_initiator_task_tag, hdr->bhssc_expected_data_transfer_length, err); return (err); } int iser_send_control(struct iser_conn *iser_conn, struct icl_iser_pdu *iser_pdu) { struct iser_tx_desc *mdesc; struct iser_device *device; size_t datalen = iser_pdu->icl_pdu.ip_data_len; - struct icl_conn *ic = &iser_conn->icl_conn; int err; mdesc = &iser_pdu->desc; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn.device; if (datalen > 0) { struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; ib_dma_sync_single_for_cpu(device->ib_device, iser_conn->login_req_dma, datalen, DMA_TO_DEVICE); ib_dma_sync_single_for_device(device->ib_device, iser_conn->login_req_dma, datalen, DMA_TO_DEVICE); tx_dsg->addr = iser_conn->login_req_dma; tx_dsg->length = datalen; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; } - /* For discovery session we re-use the login buffer */ - if (ic->ic_session_login_phase(ic) || ic->ic_session_type_discovery(ic)) { + /* For login phase and discovery session we re-use the login buffer */ + if (!iser_conn->handoff_done) { err = iser_post_recvl(iser_conn); if (err) goto send_control_error; } err = iser_post_send(&iser_conn->ib_conn, mdesc, true); if (!err) return (0); send_control_error: ISER_ERR("conn %p failed err %d", iser_conn, err); return (err); } /** * iser_rcv_dto_completion - recv DTO completion */ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, struct ib_conn *ib_conn) { struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); struct icl_conn *ic = &iser_conn->icl_conn; struct icl_pdu *response; struct iscsi_bhs *hdr; u64 rx_dma; int rx_buflen; int outstanding, count, err; /* differentiate between login to all other PDUs */ if ((char *)rx_desc == iser_conn->login_resp_buf) { rx_dma = iser_conn->login_resp_dma; rx_buflen = ISER_RX_LOGIN_SIZE; } else { rx_dma = rx_desc->dma_addr; rx_buflen = ISER_RX_PAYLOAD_SIZE; } ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); hdr = &rx_desc->iscsi_header; response = iser_new_pdu(ic, M_NOWAIT); response->ip_bhs = hdr; response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN; /* * In case we got data in the receive buffer, assign the ip_data_mbuf * to the rx_buffer - later we'll copy it to upper layer buffers */ if (response->ip_data_len) response->ip_data_mbuf = (struct mbuf *)(rx_desc->data); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; if (rx_dma == iser_conn->login_resp_dma) goto receive; outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, iser_conn->min_posted_rx); err = iser_post_recvm(iser_conn, count); if (err) ISER_ERR("posting %d rx bufs err %d", count, err); } receive: (ic->ic_receive)(response); } void iser_snd_completion(struct iser_tx_desc *tx_desc, struct ib_conn *ib_conn) { struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc); struct iser_conn *iser_conn = iser_pdu->iser_conn; if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu); }