Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h @@ -174,4 +174,5 @@ } void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *); +void process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so); #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h @@ -231,8 +231,8 @@ /* CM prototypes */ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog); -int iwch_destroy_listen(struct iw_cm_id *cm_id); +int iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog); +void iwch_destroy_listen_ep(struct iw_cm_id *cm_id); int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags); Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c @@ -259,7 +259,6 @@ void __free_ep(struct iwch_ep_common *epc) { CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); - KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); free(epc, M_DEVBUF); } @@ -1366,7 +1365,7 @@ } int -iwch_create_listen(struct iw_cm_id *cm_id, int backlog) +iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog) { int err = 0; struct iwch_listen_ep *ep; @@ -1386,35 +1385,22 @@ state_set(&ep->com, LISTEN); ep->com.so = cm_id->so; - err = init_sock(&ep->com); - if (err) - goto fail; - - err = solisten(ep->com.so, ep->backlog, ep->com.thread); - if (!err) { - cm_id->provider_data = ep; - goto out; - } - close_socket(&ep->com, 0); -fail: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); + cm_id->provider_data = ep; out: return err; } -int -iwch_destroy_listen(struct iw_cm_id *cm_id) +void +iwch_destroy_listen_ep(struct iw_cm_id *cm_id) { struct iwch_listen_ep *ep = to_listen_ep(cm_id); CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); state_set(&ep->com, DEAD); - close_socket(&ep->com, 0); cm_id->rem_ref(cm_id); put_ep(&ep->com); - return 0; + return; } int @@ -1531,54 +1517,32 @@ } } -static struct socket * -dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) -{ - struct socket *so; - - ACCEPT_LOCK(); - so = TAILQ_FIRST(&head->so_comp); - if (!so) { - ACCEPT_UNLOCK(); - return NULL; - } - TAILQ_REMOVE(&head->so_comp, so, so_list); - head->so_qlen--; - SOCK_LOCK(so); - so->so_qstate &= ~SQ_COMP; - so->so_head = NULL; - soref(so); - soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep); - so->so_state |= SS_NBIO; - PANIC_IF(!(so->so_state & SS_ISCONNECTED)); - PANIC_IF(so->so_error); - SOCK_UNLOCK(so); - ACCEPT_UNLOCK(); - soaccept(so, (struct sockaddr **)remote); - return so; -} - -static void -process_newconn(struct iwch_ep *parent_ep) +void +process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) { - struct socket *child_so; struct iwch_ep *child_ep; + struct sockaddr_in *local; struct sockaddr_in *remote; + struct iwch_ep *parent_ep = parent_cm_id->provider_data; CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); + if (!child_so) { + log(LOG_ERR, "%s - invalid child socket!\n", __func__); + return; + } child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); if (!child_ep) { log(LOG_ERR, "%s - failed to allocate ep entry!\n", __FUNCTION__); return; } - child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); - if (!child_so) { - log(LOG_ERR, "%s - failed to dequeue child socket!\n", - __FUNCTION__); - __free_ep(&child_ep->com); - return; - } + SOCKBUF_LOCK(&child_so->so_rcv); + soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep); + SOCKBUF_UNLOCK(&child_so->so_rcv); + + in_getsockaddr(child_so, (struct sockaddr **)&local); + in_getpeeraddr(child_so, (struct sockaddr **)&remote); + CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); child_ep->com.tdev = parent_ep->com.tdev; @@ -1595,9 +1559,9 @@ child_ep->com.thread = parent_ep->com.thread; child_ep->parent_ep = parent_ep; + free(local, M_SONAME); free(remote, M_SONAME); get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; callout_init(&child_ep->timer, 1); state_set(&child_ep->com, MPA_REQ_WAIT); start_ep_timer(child_ep); @@ -1635,7 +1599,10 @@ } if (state == LISTEN) { - process_newconn(ep); + /* socket listening events are handled at IWCM */ + CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__, + ep->com.state, ep); + BUG(); return; } Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c @@ -1140,8 +1140,9 @@ dev->ibdev.iwcm->connect = iwch_connect; dev->ibdev.iwcm->accept = iwch_accept_cr; dev->ibdev.iwcm->reject = iwch_reject_cr; - dev->ibdev.iwcm->create_listen = iwch_create_listen; - dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen; + dev->ibdev.iwcm->create_listen_ep = iwch_create_listen_ep; + dev->ibdev.iwcm->destroy_listen_ep = iwch_destroy_listen_ep; + dev->ibdev.iwcm->newconn = process_newconn; dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->get_qp = iwch_get_qp; Index: head/sys/dev/cxgbe/iw_cxgbe/cm.c =================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/cm.c +++ head/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -110,8 +110,6 @@ static void init_sock(struct c4iw_ep_common *epc); static void process_data(struct c4iw_ep *ep); static void process_connected(struct c4iw_ep *ep); -static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep); -static void process_newconn(struct c4iw_ep *parent_ep); static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); static void process_socket_event(struct c4iw_ep *ep); static void release_ep_resources(struct c4iw_ep *ep); @@ -624,40 +622,21 @@ } } -static struct socket * -dequeue_socket(struct socket *head, struct sockaddr_in **remote, - struct c4iw_ep *child_ep) -{ - struct socket *so; - - ACCEPT_LOCK(); - so = TAILQ_FIRST(&head->so_comp); - if (!so) { - ACCEPT_UNLOCK(); - return (NULL); - } - TAILQ_REMOVE(&head->so_comp, so, so_list); - head->so_qlen--; - SOCK_LOCK(so); - so->so_qstate &= ~SQ_COMP; - so->so_head = NULL; - soref(so); - soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep); - so->so_state |= SS_NBIO; - SOCK_UNLOCK(so); - ACCEPT_UNLOCK(); - soaccept(so, (struct sockaddr **)remote); - - return (so); -} - -static void -process_newconn(struct c4iw_ep *parent_ep) +void +process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) { - struct socket *child_so; struct c4iw_ep *child_ep; + struct sockaddr_in *local; struct sockaddr_in *remote; + struct c4iw_ep *parent_ep = parent_cm_id->provider_data; + if (!child_so) { + CTR4(KTR_IW_CXGBE, + "%s: parent so %p, parent ep %p, child so %p, invalid so", + __func__, parent_ep->com.so, parent_ep, child_so); + log(LOG_ERR, "%s: invalid child socket\n", __func__); + return; + } child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); if (!child_ep) { CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", @@ -665,23 +644,18 @@ log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); return; } - - child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); - if (!child_so) { - CTR4(KTR_IW_CXGBE, - "%s: parent so %p, parent ep %p, child ep %p, dequeue err", - __func__, parent_ep->com.so, parent_ep, child_ep); - log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__); - __free_ep(&child_ep->com); - return; - - } + SOCKBUF_LOCK(&child_so->so_rcv); + soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep); + SOCKBUF_UNLOCK(&child_so->so_rcv); CTR5(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, child so %p, child ep %p", __func__, parent_ep->com.so, parent_ep, child_so, child_ep); - child_ep->com.local_addr = parent_ep->com.local_addr; + in_getsockaddr(child_so, (struct sockaddr **)&local); + in_getpeeraddr(child_so, (struct sockaddr **)&remote); + + child_ep->com.local_addr = *local; child_ep->com.remote_addr = *remote; child_ep->com.dev = parent_ep->com.dev; child_ep->com.so = child_so; @@ -689,15 +663,17 @@ child_ep->com.thread = parent_ep->com.thread; child_ep->parent_ep = parent_ep; + free(local, M_SONAME); free(remote, M_SONAME); + c4iw_get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; init_timer(&child_ep->timer); state_set(&child_ep->com, MPA_REQ_WAIT); START_EP_TIMER(child_ep); /* maybe the request has already been queued up on the socket... */ process_mpa_request(child_ep); + return; } static int @@ -739,7 +715,10 @@ } if (state == LISTEN) { - process_newconn(ep); + /* socket listening events are handled at IWCM */ + CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__, + ep->com.state, ep); + BUG(); return; } @@ -920,7 +899,6 @@ ep = container_of(kref, struct c4iw_ep, com.kref); epc = &ep->com; - KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so)); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", __func__, epc)); kfree(ep); @@ -2127,10 +2105,10 @@ } /* - * iwcm->create_listen. Returns -errno on failure. + * iwcm->create_listen_ep. Returns -errno on failure. */ int -c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) +c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog) { int rc; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); @@ -2155,17 +2133,6 @@ ep->com.thread = curthread; state_set(&ep->com, LISTEN); ep->com.so = so; - init_sock(&ep->com); - - rc = solisten(so, ep->backlog, ep->com.thread); - if (rc != 0) { - log(LOG_ERR, "%s: failed to start listener: %d\n", __func__, - rc); - close_socket(&ep->com, 0); - cm_id->rem_ref(cm_id); - c4iw_put_ep(&ep->com); - goto failed; - } cm_id->provider_data = ep; return (0); @@ -2175,21 +2142,19 @@ return (-rc); } -int -c4iw_destroy_listen(struct iw_cm_id *cm_id) +void +c4iw_destroy_listen_ep(struct iw_cm_id *cm_id) { - int rc; struct c4iw_listen_ep *ep = to_listen_ep(cm_id); - CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id, - cm_id->so, cm_id->so->so_pcb); + CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id, + cm_id->so, states[ep->com.state]); state_set(&ep->com, DEAD); - rc = close_socket(&ep->com, 0); cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); - return (rc); + return; } int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) Index: head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h =================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h +++ head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h @@ -850,8 +850,8 @@ int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); -int c4iw_destroy_listen(struct iw_cm_id *cm_id); +int c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog); +void c4iw_destroy_listen_ep(struct iw_cm_id *cm_id); int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); @@ -914,6 +914,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, struct c4iw_dev_ucontext *uctx); void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); +void process_newconn(struct iw_cm_id *parent_cm_id, + struct socket *child_so); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; Index: head/sys/dev/cxgbe/iw_cxgbe/provider.c =================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/provider.c +++ head/sys/dev/cxgbe/iw_cxgbe/provider.c @@ -474,8 +474,9 @@ iwcm->connect = c4iw_connect; iwcm->accept = c4iw_accept_cr; iwcm->reject = c4iw_reject_cr; - iwcm->create_listen = c4iw_create_listen; - iwcm->destroy_listen = c4iw_destroy_listen; + iwcm->create_listen_ep = c4iw_create_listen_ep; + iwcm->destroy_listen_ep = c4iw_destroy_listen_ep; + iwcm->newconn = process_newconn; iwcm->add_ref = c4iw_qp_add_ref; iwcm->rem_ref = c4iw_qp_rem_ref; iwcm->get_qp = c4iw_get_qp; Index: head/sys/ofed/drivers/infiniband/core/cma.c =================================================================== --- head/sys/ofed/drivers/infiniband/core/cma.c +++ head/sys/ofed/drivers/infiniband/core/cma.c @@ -407,6 +407,75 @@ return -EAGAIN; } +int +rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type, + void **cm_id) +{ + int ret; + u8 port; + int found_dev = 0, found_cmid = 0; + struct rdma_id_private *id_priv; + struct rdma_id_private *dev_id_priv; + struct cma_device *cma_dev; + struct rdma_dev_addr dev_addr; + union ib_gid gid; + enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; + + memset(&dev_addr, 0, sizeof(dev_addr)); + + ret = rdma_translate_ip((struct sockaddr *)local_addr, + &dev_addr, NULL); + if (ret) + goto err; + + /* find rdma device based on MAC address/gid */ + mutex_lock(&lock); + + memcpy(&gid, dev_addr.src_dev_addr + + rdma_addr_gid_offset(&dev_addr), sizeof(gid)); + + list_for_each_entry(cma_dev, &dev_list, list) + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) + if ((rdma_port_get_link_layer(cma_dev->device, port) == + dev_ll) && + (rdma_node_get_transport(cma_dev->device->node_type) == + RDMA_TRANSPORT_IWARP)) { + ret = find_gid_port(cma_dev->device, + &gid, port); + if (!ret) { + found_dev = 1; + goto out; + } else if (ret == 1) { + mutex_unlock(&lock); + goto err; + } + } +out: + mutex_unlock(&lock); + + if (!found_dev) + goto err; + + /* Traverse through the list of listening cm_id's to find the + * desired cm_id based on rdma device & port number. + */ + list_for_each_entry(id_priv, &listen_any_list, list) + list_for_each_entry(dev_id_priv, &id_priv->listen_list, + listen_list) + if (dev_id_priv->cma_dev == cma_dev) + if (dev_id_priv->cm_id.iw->local_addr.sin_port + == local_addr->sin_port) { + *cm_id = (void *)dev_id_priv->cm_id.iw; + found_cmid = 1; + } + return found_cmid ? 0 : -ENODEV; + +err: + return -ENODEV; +} +EXPORT_SYMBOL(rdma_find_cmid_laddr); + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; @@ -780,6 +849,12 @@ { return cma_zero_addr(addr) || cma_loopback_addr(addr); } +int +rdma_cma_any_addr(struct sockaddr *addr) +{ + return cma_any_addr(addr); +} +EXPORT_SYMBOL(rdma_cma_any_addr); static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) { @@ -1707,6 +1782,7 @@ dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; + dev_id_priv->sock = id_priv->sock; memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); Index: head/sys/ofed/drivers/infiniband/core/iwcm.c =================================================================== --- head/sys/ofed/drivers/infiniband/core/iwcm.c +++ head/sys/ofed/drivers/infiniband/core/iwcm.c @@ -47,7 +47,10 @@ #include #include #include +#include +#include +#include #include #include @@ -65,6 +68,85 @@ struct iw_cm_event event; struct list_head free_list; }; +struct iwcm_listen_work { + struct work_struct work; + struct iw_cm_id *cm_id; +}; + +static LIST_HEAD(listen_port_list); + +static DEFINE_MUTEX(listen_port_mutex); +static DEFINE_MUTEX(dequeue_mutex); + +struct listen_port_info { + struct list_head list; + uint16_t port_num; + uint32_t refcnt; +}; + +static int32_t +add_port_to_listenlist(uint16_t port) +{ + struct listen_port_info *port_info; + int err = 0; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) + goto found_port; + + port_info = kmalloc(sizeof(*port_info), GFP_KERNEL); + if (!port_info) { + err = -ENOMEM; + mutex_unlock(&listen_port_mutex); + goto out; + } + + port_info->port_num = port; + port_info->refcnt = 0; + + list_add(&port_info->list, &listen_port_list); + +found_port: + ++(port_info->refcnt); + mutex_unlock(&listen_port_mutex); + return port_info->refcnt; +out: + return err; +} + +static int32_t +rem_port_from_listenlist(uint16_t port) +{ + struct listen_port_info *port_info; + int ret, found_port = 0; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) { + found_port = 1; + break; + } + + if (found_port) { + --(port_info->refcnt); + ret = port_info->refcnt; + if (port_info->refcnt == 0) { + /* Remove this entry from the list as there are no + * more listeners for this port_num. + */ + list_del(&port_info->list); + kfree(port_info); + } + } else { + ret = -EINVAL; + } + mutex_unlock(&listen_port_mutex); + return ret; + +} /* * The following services provide a mechanism for pre-allocating iwcm_work @@ -320,6 +402,165 @@ } EXPORT_SYMBOL(iw_cm_disconnect); +static struct socket * +dequeue_socket(struct socket *head) +{ + struct socket *so; + struct sockaddr_in *remote; + + ACCEPT_LOCK(); + so = TAILQ_FIRST(&head->so_comp); + if (!so) { + ACCEPT_UNLOCK(); + return NULL; + } + + SOCK_LOCK(so); + /* + * Before changing the flags on the socket, we have to bump the + * reference count. Otherwise, if the protocol calls sofree(), + * the socket will be released due to a zero refcount. + */ + soref(so); + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + so->so_state |= SS_NBIO; + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + soaccept(so, (struct sockaddr **)&remote); + + free(remote, M_SONAME); + return so; +} +static void +iw_so_event_handler(struct work_struct *_work) +{ + struct iwcm_listen_work *work = container_of(_work, + struct iwcm_listen_work, work); + struct iw_cm_id *listen_cm_id = work->cm_id; + struct iwcm_id_private *cm_id_priv; + struct iw_cm_id *real_cm_id; + struct sockaddr_in *local; + struct socket *so; + + cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id); + + if (cm_id_priv->state != IW_CM_STATE_LISTEN) { + kfree(work); + return; + } + mutex_lock(&dequeue_mutex); + + /* Dequeue & process all new 'so' connection requests for this cmid */ + while ((so = dequeue_socket(work->cm_id->so)) != NULL) { + if (rdma_cma_any_addr((struct sockaddr *) + &listen_cm_id->local_addr)) { + in_getsockaddr(so, (struct sockaddr **)&local); + if (rdma_find_cmid_laddr(local, ARPHRD_ETHER, + (void **) &real_cm_id)) { + free(local, M_SONAME); + goto err; + } + free(local, M_SONAME); + + real_cm_id->device->iwcm->newconn(real_cm_id, so); + } else { + listen_cm_id->device->iwcm->newconn(listen_cm_id, so); + } + } +err: + mutex_unlock(&dequeue_mutex); + kfree(work); + return; +} +static int +iw_so_upcall(struct socket *parent_so, void *arg, int waitflag) +{ + struct iwcm_listen_work *work; + struct socket *so; + struct iw_cm_id *cm_id = arg; + + mutex_lock(&dequeue_mutex); + /* check whether iw_so_event_handler() already dequeued this 'so' */ + so = TAILQ_FIRST(&parent_so->so_comp); + if (!so) + return SU_OK; + work = kzalloc(sizeof(*work), M_NOWAIT); + if (!work) + return -ENOMEM; + work->cm_id = cm_id; + + INIT_WORK(&work->work, iw_so_event_handler); + queue_work(iwcm_wq, &work->work); + + mutex_unlock(&dequeue_mutex); + return SU_OK; +} + +static void +iw_init_sock(struct iw_cm_id *cm_id) +{ + struct sockopt sopt; + struct socket *so = cm_id->so; + int on = 1; + + SOCK_LOCK(so); + soupcall_set(so, SO_RCV, iw_so_upcall, cm_id); + so->so_state |= SS_NBIO; + SOCK_UNLOCK(so); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof(on); + sopt.sopt_td = NULL; + sosetopt(so, &sopt); +} + +static int +iw_close_socket(struct iw_cm_id *cm_id, int close) +{ + struct socket *so = cm_id->so; + int rc; + + + SOCK_LOCK(so); + soupcall_clear(so, SO_RCV); + SOCK_UNLOCK(so); + + if (close) + rc = soclose(so); + else + rc = soshutdown(so, SHUT_WR | SHUT_RD); + + cm_id->so = NULL; + + return rc; +} + +static int +iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int rc; + + iw_init_sock(cm_id); + rc = solisten(cm_id->so, backlog, curthread); + if (rc != 0) + iw_close_socket(cm_id, 0); + return rc; +} + +static int +iw_destroy_listen(struct iw_cm_id *cm_id) +{ + int rc; + rc = iw_close_socket(cm_id, 0); + return rc; +} + + /* * CM_ID <-- DESTROYING * @@ -330,7 +571,7 @@ { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret; + int ret = 0, refcnt; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* @@ -345,8 +586,18 @@ case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - /* destroy the listening endpoint */ - ret = cm_id->device->iwcm->destroy_listen(cm_id); + if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) { + refcnt = + rem_port_from_listenlist(cm_id->local_addr.sin_port); + + if (refcnt == 0) + ret = iw_destroy_listen(cm_id); + + cm_id->device->iwcm->destroy_listen_ep(cm_id); + } else { + ret = iw_destroy_listen(cm_id); + cm_id->device->iwcm->destroy_listen_ep(cm_id); + } spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: @@ -418,7 +669,7 @@ { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret; + int ret, refcnt; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -431,9 +682,33 @@ case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); - if (ret) + + if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) { + refcnt = + add_port_to_listenlist(cm_id->local_addr.sin_port); + + if (refcnt == 1) { + ret = iw_create_listen(cm_id, backlog); + } else if (refcnt <= 0) { + ret = -EINVAL; + } else { + /* if refcnt > 1, a socket listener created + * already. And we need not create socket + * listener on other rdma devices/listen cm_id's + * due to TOE. That is when a socket listener is + * created with INADDR_ANY all registered TOE + * devices will get a call to start + * hardware listeners. + */ + } + } else { + ret = iw_create_listen(cm_id, backlog); + } + if (!ret) + cm_id->device->iwcm->create_listen_ep(cm_id, backlog); + else cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); break; default: Index: head/sys/ofed/include/rdma/iw_cm.h =================================================================== --- head/sys/ofed/include/rdma/iw_cm.h +++ head/sys/ofed/include/rdma/iw_cm.h @@ -120,10 +120,13 @@ int (*reject)(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); - int (*create_listen)(struct iw_cm_id *cm_id, + int (*create_listen_ep)(struct iw_cm_id *cm_id, int backlog); - int (*destroy_listen)(struct iw_cm_id *cm_id); + void (*destroy_listen_ep)(struct iw_cm_id *cm_id); + + void (*newconn)(struct iw_cm_id *parent_cm_id, + struct socket *so); }; /** Index: head/sys/ofed/include/rdma/rdma_cm.h =================================================================== --- head/sys/ofed/include/rdma/rdma_cm.h +++ head/sys/ofed/include/rdma/rdma_cm.h @@ -400,5 +400,7 @@ * @timeout: QP timeout */ void rdma_set_timeout(struct rdma_cm_id *id, int timeout); - +int rdma_cma_any_addr(struct sockaddr *addr); +int rdma_find_cmid_laddr(struct sockaddr_in *local_addr, + unsigned short dev_type, void **cm_id); #endif /* RDMA_CM_H */