Index: head/sys/ofed/drivers/infiniband/core/ib_cm.c =================================================================== --- head/sys/ofed/drivers/infiniband/core/ib_cm.c (revision 338540) +++ head/sys/ofed/drivers/infiniband/core/ib_cm.c (revision 338541) @@ -1,4147 +1,4150 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cm_msgs.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); static struct ib_client cm_client = { .name = "cm", .add = cm_add_one, .remove = cm_remove_one }; static struct ib_cm { spinlock_t lock; struct list_head device_list; rwlock_t device_lock; struct rb_root listen_service_table; u64 listen_service_id; /* struct rb_root peer_service_table; todo: fix peer to peer */ struct rb_root remote_qp_table; struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; /* Sync on cm change port state */ spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ enum { CM_REQ_COUNTER, CM_MRA_COUNTER, CM_REJ_COUNTER, CM_REP_COUNTER, CM_RTU_COUNTER, CM_DREQ_COUNTER, CM_DREP_COUNTER, CM_SIDR_REQ_COUNTER, CM_SIDR_REP_COUNTER, CM_LAP_COUNTER, CM_APR_COUNTER, CM_ATTR_COUNT, CM_ATTR_ID_OFFSET = 0x0010, }; enum { CM_XMIT, CM_XMIT_RETRIES, CM_RECV, CM_RECV_DUPLICATES, CM_COUNTER_GROUPS }; static char const counter_group_names[CM_COUNTER_GROUPS] [sizeof("cm_rx_duplicates")] = { "cm_tx_msgs", "cm_tx_retries", "cm_rx_msgs", "cm_rx_duplicates" }; struct cm_counter_group { struct kobject obj; atomic_long_t counter[CM_ATTR_COUNT]; }; struct cm_counter_attribute { struct attribute attr; int index; }; #define CM_COUNTER_ATTR(_name, _index) \ struct cm_counter_attribute cm_##_name##_counter_attr = { \ .attr = { .name = __stringify(_name), .mode = 0444 }, \ .index = _index \ } static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); static struct attribute *cm_counter_default_attrs[] = { &cm_req_counter_attr.attr, &cm_mra_counter_attr.attr, &cm_rej_counter_attr.attr, &cm_rep_counter_attr.attr, &cm_rtu_counter_attr.attr, &cm_dreq_counter_attr.attr, &cm_drep_counter_attr.attr, &cm_sidr_req_counter_attr.attr, &cm_sidr_rep_counter_attr.attr, &cm_lap_counter_attr.attr, &cm_apr_counter_attr.attr, NULL }; struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; struct list_head cm_priv_prim_list; struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { struct list_head list; struct ib_device *ib_device; struct device *device; u8 ack_delay; int going_down; struct cm_port *port[0]; }; struct cm_av { struct cm_port *port; union ib_gid dgid; struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; }; struct cm_work { struct delayed_work work; struct list_head list; struct cm_port *port; struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ __be32 local_id; /* Established / timewait */ __be32 remote_id; struct ib_cm_event cm_event; struct ib_sa_path_rec path[0]; }; struct cm_timewait_info { struct cm_work work; /* Must be first. */ struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; __be64 remote_ca_guid; __be32 remote_qpn; u8 inserted_remote_qp; u8 inserted_remote_id; }; struct cm_id_private { struct ib_cm_id id; struct rb_node service_node; struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; atomic_t refcount; /* Number of clients sharing this ib_cm_id. Only valid for listeners. * Protected by the cm.lock spinlock. */ int listen_sharecount; struct ib_mad_send_buf *msg; struct cm_timewait_info *timewait_info; /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; void *private_data; __be64 tid; __be32 local_qpn; __be32 remote_qpn; enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; enum ib_mtu path_mtu; __be16 pkey; u8 private_data_len; u8 max_cm_retries; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; u8 retry_count; u8 rnr_retry_count; u8 service_timeout; u8 target_ack_delay; struct list_head prim_list; struct list_head altr_list; /* Indicates that the send port mad is registered and av is set */ int prim_send_port_not_ready; int altr_send_port_not_ready; struct list_head work_list; atomic_t work_count; }; static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { if (atomic_dec_and_test(&cm_id_priv->refcount)) complete(&cm_id_priv->comp); } static int cm_alloc_msg(struct cm_id_private *cm_id_priv, struct ib_mad_send_buf **msg) { struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; struct cm_av *av; unsigned long flags, flags2; int ret = 0; /* don't let the port to be released till the agent is down */ spin_lock_irqsave(&cm.state_lock, flags2); spin_lock_irqsave(&cm.lock, flags); if (!cm_id_priv->prim_send_port_not_ready) av = &cm_id_priv->av; else if (!cm_id_priv->altr_send_port_not_ready && (cm_id_priv->alt_av.port)) av = &cm_id_priv->alt_av; else { pr_info("%s: not valid CM id\n", __func__); ret = -ENODEV; spin_unlock_irqrestore(&cm.lock, flags); goto out; } spin_unlock_irqrestore(&cm.lock, flags); /* Make sure the port haven't released the mad yet */ mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { pr_info("%s: not a valid MAD agent\n", __func__); ret = -ENODEV; goto out; } ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto out; } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, av->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); ret = PTR_ERR(m); goto out; } /* Timeout set by caller if response is expected. */ m->ah = ah; m->retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; out: spin_unlock_irqrestore(&cm.state_lock, flags2); return ret; } static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, struct ib_mad_send_buf **msg) { struct ib_mad_send_buf *m; struct ib_ah *ah; ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, port->port_num); if (IS_ERR(ah)) return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { ib_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); } static void * cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; if (!private_data || !private_data_len) return NULL; data = kmemdup(private_data, private_data_len, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); return data; } static void cm_set_private_data(struct cm_id_private *cm_id_priv, void *private_data, u8 private_data_len) { if (cm_id_priv->private_data && cm_id_priv->private_data_len) kfree(cm_id_priv->private_data); cm_id_priv->private_data = private_data; cm_id_priv->private_data_len = private_data_len; } static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { av->port = port; av->pkey_index = wc->pkey_index; ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, grh, &av->ah_attr); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, struct cm_id_private *cm_id_priv) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; int ret; u8 p; struct net_device *ndev = ib_get_ndev_from_path(path); read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, path->gid_type, ndev, &p, NULL)) { port = cm_dev->port[p-1]; break; } } read_unlock_irqrestore(&cm.device_lock, flags); if (ndev) dev_put(ndev); if (!port) return -EINVAL; ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, be16_to_cpu(path->pkey), &av->pkey_index); if (ret) return ret; av->port = port; ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); if (ret) return ret; av->timeout = path->packet_life_time + 1; spin_lock_irqsave(&cm.lock, flags); if (&cm_id_priv->av == av) list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); else if (&cm_id_priv->alt_av == av) list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); else ret = -EINVAL; spin_unlock_irqrestore(&cm.lock, flags); return ret; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; int id; idr_preload(GFP_KERNEL); spin_lock_irqsave(&cm.lock, flags); id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); spin_unlock_irqrestore(&cm.lock, flags); idr_preload_end(); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; return id < 0 ? id : 0; } static void cm_free_id(__be32 local_id) { spin_lock_irq(&cm.lock); idr_remove(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irq(&cm.lock); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; cm_id_priv = idr_find(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } return cm_id_priv; } static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; spin_lock_irq(&cm.lock); cm_id_priv = cm_get_id(local_id, remote_id); spin_unlock_irq(&cm.lock); return cm_id_priv; } /* * Trivial helpers to strip endian annotation and compare; the * endianness doesn't actually matter since we just need a stable * order for the RB tree. */ static int be32_lt(__be32 a, __be32 b) { return (__force u32) a < (__force u32) b; } static int be32_gt(__be32 a, __be32 b) { return (__force u32) a > (__force u32) b; } static int be64_lt(__be64 a, __be64 b) { return (__force u64) a < (__force u64) b; } static int be64_gt(__be64 a, __be64 b) { return (__force u64) a > (__force u64) b; } static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && (cm_id_priv->id.device == cur_cm_id_priv->id.device)) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) link = &(*link)->rb_left; else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) link = &(*link)->rb_right; else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_left; else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_right; else link = &(*link)->rb_right; } rb_link_node(&cm_id_priv->service_node, parent, link); rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); return NULL; } static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && (cm_id_priv->id.device == device)) return cm_id_priv; if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) node = node->rb_right; else if (be64_lt(service_id, cm_id_priv->id.service_id)) node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; else node = node->rb_right; } return NULL; } static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info *timewait_info) { struct rb_node **link = &cm.remote_id_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; __be64 remote_ca_guid = timewait_info->remote_ca_guid; __be32 remote_id = timewait_info->work.remote_id; while (*link) { parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_id_node); if (be32_lt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_left; else if (be32_gt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_right; else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; } timewait_info->inserted_remote_id = 1; rb_link_node(&timewait_info->remote_id_node, parent, link); rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); return NULL; } static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, __be32 remote_id) { struct rb_node *node = cm.remote_id_table.rb_node; struct cm_timewait_info *timewait_info; while (node) { timewait_info = rb_entry(node, struct cm_timewait_info, remote_id_node); if (be32_lt(remote_id, timewait_info->work.remote_id)) node = node->rb_left; else if (be32_gt(remote_id, timewait_info->work.remote_id)) node = node->rb_right; else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_left; else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_right; else return timewait_info; } return NULL; } static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info *timewait_info) { struct rb_node **link = &cm.remote_qp_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; __be64 remote_ca_guid = timewait_info->remote_ca_guid; __be32 remote_qpn = timewait_info->remote_qpn; while (*link) { parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_qp_node); if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_left; else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_right; else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; } timewait_info->inserted_remote_qp = 1; rb_link_node(&timewait_info->remote_qp_node, parent, link); rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); return NULL; } static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.remote_sidr_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; union ib_gid *port_gid = &cm_id_priv->av.dgid; __be32 remote_id = cm_id_priv->id.remote_id; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, sidr_id_node); if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_left; else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_right; else { int cmp; cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, sizeof *port_gid); if (cmp < 0) link = &(*link)->rb_left; else if (cmp > 0) link = &(*link)->rb_right; else return cur_cm_id_priv; } } rb_link_node(&cm_id_priv->sidr_id_node, parent, link); rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); return NULL; } static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, enum ib_cm_sidr_status status) { struct ib_cm_sidr_rep_param param; memset(¶m, 0, sizeof param); param.status = status; ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; int ret; cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->id.state = IB_CM_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; ret = cm_alloc_id(cm_id_priv); if (ret) goto error; spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->prim_list); INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; error: kfree(cm_id_priv); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(ib_create_cm_id); static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) { struct cm_work *work; if (list_empty(&cm_id_priv->work_list)) return NULL; work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); list_del(&work->list); return work; } static void cm_free_work(struct cm_work *work) { if (work->mad_recv_wc) ib_free_recv_mad(work->mad_recv_wc); kfree(work); } static inline int cm_convert_to_ms(int iba_time) { /* approximate conversion to ms from 4.096us x 2^iba_time */ return 1 << max(iba_time - 8, 0); } /* * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time * Because of how ack_timeout is stored, adding one doubles the timeout. * To avoid large timeouts, select the max(ack_delay, life_time + 1), and * increment it (round up) only if the other is within 50%. */ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) { int ack_timeout = packet_life_time + 1; if (ack_timeout >= ca_ack_delay) ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); else ack_timeout = ca_ack_delay + (ack_timeout >= (ca_ack_delay - 1)); return min(31, ack_timeout); } static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; } if (timewait_info->inserted_remote_qp) { rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) { struct cm_timewait_info *timewait_info; timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); if (!timewait_info) return ERR_PTR(-ENOMEM); timewait_info->work.local_id = local_id; INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler); timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; return timewait_info; } static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; struct cm_device *cm_dev; cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); if (!cm_dev) return; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); spin_unlock_irqrestore(&cm.lock, flags); /* * The cm_id could be destroyed by the user before we exit timewait. * To protect against this, we search for the cm_id after exiting * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); /* Check if the device started its remove_one */ spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); spin_unlock_irqrestore(&cm.lock, flags); cm_id_priv->timewait_info = NULL; } static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { unsigned long flags; cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; cm_id_priv = container_of(cm_id, struct cm_id_private, id); retest: spin_lock_irq(&cm_id_priv->lock); switch (cm_id->state) { case IB_CM_LISTEN: spin_unlock_irq(&cm_id_priv->lock); spin_lock_irq(&cm.lock); if (--cm_id_priv->listen_sharecount > 0) { /* The id is still shared. */ cm_deref_id(cm_id_priv); spin_unlock_irq(&cm.lock); return; } rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); spin_unlock_irq(&cm.lock); break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_SIDR_REQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); spin_lock_irq(&cm.lock); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irq(&cm.lock); break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof cm_id_priv->id.device->node_guid, NULL, 0); break; case IB_CM_REQ_RCVD: if (err == -ENOMEM) { /* Do not reject to allow future retries. */ cm_reset_to_idle(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); } else { spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); } break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) break; ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_DREQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_drep(cm_id, NULL, 0); break; default: spin_unlock_irq(&cm_id_priv->lock); break; } spin_lock_irq(&cm.lock); if (!list_empty(&cm_id_priv->altr_list) && (!cm_id_priv->altr_send_port_not_ready)) list_del(&cm_id_priv->altr_list); if (!list_empty(&cm_id_priv->prim_list) && (!cm_id_priv->prim_send_port_not_ready)) list_del(&cm_id_priv->prim_list); spin_unlock_irq(&cm.lock); cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } void ib_destroy_cm_id(struct ib_cm_id *cm_id) { cm_destroy_id(cm_id, 0); } EXPORT_SYMBOL(ib_destroy_cm_id); /** * __ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. * @cm_id: Connection identifier associated with the listen request. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * @service_mask: Mask applied to service ID used to listen across a * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. */ static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; int ret = 0; service_mask = service_mask ? service_mask : ~cpu_to_be64(0); service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); if (cm_id->state != IB_CM_IDLE) return -EINVAL; cm_id->state = IB_CM_LISTEN; ++cm_id_priv->listen_sharecount; if (service_id == IB_CM_ASSIGN_SERVICE_ID) { cm_id->service_id = cpu_to_be64(cm.listen_service_id++); cm_id->service_mask = ~cpu_to_be64(0); } else { cm_id->service_id = service_id; cm_id->service_mask = service_mask; } cur_cm_id_priv = cm_insert_listen(cm_id_priv); if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; --cm_id_priv->listen_sharecount; ret = -EBUSY; } return ret; } int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm.lock, flags); ret = __ib_cm_listen(cm_id, service_id, service_mask); spin_unlock_irqrestore(&cm.lock, flags); return ret; } EXPORT_SYMBOL(ib_cm_listen); /** * Create a new listening ib_cm_id and listen on the given service ID. * * If there's an existing ID listening on that same device and service ID, * return it. * * @device: Device associated with the cm_id. All related communication will * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * * Callers should call ib_destroy_cm_id when done with the listener ID. */ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, __be64 service_id) { struct cm_id_private *cm_id_priv; struct ib_cm_id *cm_id; unsigned long flags; int err = 0; /* Create an ID in advance, since the creation may sleep */ cm_id = ib_create_cm_id(device, cm_handler, NULL); if (IS_ERR(cm_id)) return cm_id; spin_lock_irqsave(&cm.lock, flags); if (service_id == IB_CM_ASSIGN_SERVICE_ID) goto new_id; /* Find an existing ID */ cm_id_priv = cm_find_listen(device, service_id); if (cm_id_priv) { if (cm_id->cm_handler != cm_handler || cm_id->context) { /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); ++cm_id_priv->listen_sharecount; spin_unlock_irqrestore(&cm.lock, flags); ib_destroy_cm_id(cm_id); cm_id = &cm_id_priv->id; return cm_id; } new_id: /* Use newly created ID */ err = __ib_cm_listen(cm_id, service_id, 0); spin_unlock_irqrestore(&cm.lock, flags); if (err) { ib_destroy_cm_id(cm_id); return ERR_PTR(err); } return cm_id; } EXPORT_SYMBOL(ib_cm_insert_listen); static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, enum cm_msg_sequence msg_seq) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | (msg_seq << 30)); return cpu_to_be64(hi_tid | low_tid); } static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, __be16 attr_id, __be64 tid) { hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_CM; hdr->class_version = IB_CM_CLASS_VERSION; hdr->method = IB_MGMT_METHOD_SEND; hdr->attr_id = attr_id; hdr->tid = tid; } static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) { struct ib_sa_path_rec *pri_path = param->primary_path; struct ib_sa_path_rec *alt_path = param->alternate_path; cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); cm_req_set_qp_type(req_msg, param->qp_type); cm_req_set_flow_ctrl(req_msg, param->flow_control); cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); if (param->qp_type != IB_QPT_XRC_INI) { cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_retry_count(req_msg, param->retry_count); cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_srq(req_msg, param->srq); } if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; req_msg->primary_remote_lid = pri_path->dlid; } else { /* Work-around until there's a way to obtain remote LID info */ req_msg->primary_local_lid = IB_LID_PERMISSIVE; req_msg->primary_remote_lid = IB_LID_PERMISSIVE; } req_msg->primary_local_gid = pri_path->sgid; req_msg->primary_remote_gid = pri_path->dgid; cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); cm_req_set_primary_packet_rate(req_msg, pri_path->rate); req_msg->primary_traffic_class = pri_path->traffic_class; req_msg->primary_hop_limit = pri_path->hop_limit; cm_req_set_primary_sl(req_msg, pri_path->sl); cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); cm_req_set_primary_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, pri_path->packet_life_time)); if (alt_path) { if (alt_path->hop_limit <= 1) { req_msg->alt_local_lid = alt_path->slid; req_msg->alt_remote_lid = alt_path->dlid; } else { req_msg->alt_local_lid = IB_LID_PERMISSIVE; req_msg->alt_remote_lid = IB_LID_PERMISSIVE; } req_msg->alt_local_gid = alt_path->sgid; req_msg->alt_remote_gid = alt_path->dgid; cm_req_set_alt_flow_label(req_msg, alt_path->flow_label); cm_req_set_alt_packet_rate(req_msg, alt_path->rate); req_msg->alt_traffic_class = alt_path->traffic_class; req_msg->alt_hop_limit = alt_path->hop_limit; cm_req_set_alt_sl(req_msg, alt_path->sl); cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); cm_req_set_alt_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } if (param->private_data && param->private_data_len) memcpy(req_msg->private_data, param->private_data, param->private_data_len); } static int cm_validate_req_param(struct ib_cm_req_param *param) { /* peer-to-peer not supported */ if (param->peer_to_peer) return -EINVAL; if (!param->primary_path) return -EINVAL; if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && param->qp_type != IB_QPT_XRC_INI) return -EINVAL; if (param->private_data && param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) return -EINVAL; if (param->alternate_path && (param->alternate_path->pkey != param->primary_path->pkey || param->alternate_path->mtu != param->primary_path->mtu)) return -EINVAL; return 0; } int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; struct cm_req_msg *req_msg; unsigned long flags; int ret; ret = cm_validate_req_param(param); if (ret) return ret; /* Verify that we're not in timewait. */ cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto out; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto out; } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, cm_id_priv); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( param->remote_cm_response_timeout); cm_id_priv->max_cm_retries = param->max_cm_retries; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) goto error1; req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; } BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error2: cm_free_msg(cm_id_priv->msg); error1: kfree(cm_id_priv->timewait_info); out: return ret; } EXPORT_SYMBOL(ib_send_cm_req); static int cm_issue_rej(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, enum ib_cm_rej_reason reason, enum cm_msg_response msg_rejected, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); if (ret) return ret; /* We just need common CM header information. Cast to any message. */ rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; rej_msg = (struct cm_rej_msg *) msg->mad; cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); rej_msg->remote_comm_id = rcv_msg->local_comm_id; rej_msg->local_comm_id = rcv_msg->remote_comm_id; cm_rej_set_msg_rejected(rej_msg, msg_rejected); rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); } ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); return ret; } static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct ib_sa_path_rec *primary_path, struct ib_sa_path_rec *alt_path) { memset(primary_path, 0, sizeof *primary_path); primary_path->dgid = req_msg->primary_local_gid; primary_path->sgid = req_msg->primary_remote_gid; primary_path->dlid = req_msg->primary_local_lid; primary_path->slid = req_msg->primary_remote_lid; primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); primary_path->hop_limit = req_msg->primary_hop_limit; primary_path->traffic_class = req_msg->primary_traffic_class; primary_path->reversible = 1; primary_path->pkey = req_msg->pkey; primary_path->sl = cm_req_get_primary_sl(req_msg); primary_path->mtu_selector = IB_SA_EQ; primary_path->mtu = cm_req_get_path_mtu(req_msg); primary_path->rate_selector = IB_SA_EQ; primary_path->rate = cm_req_get_primary_packet_rate(req_msg); primary_path->packet_life_time_selector = IB_SA_EQ; primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); primary_path->service_id = req_msg->service_id; if (req_msg->alt_local_lid) { memset(alt_path, 0, sizeof *alt_path); alt_path->dgid = req_msg->alt_local_gid; alt_path->sgid = req_msg->alt_remote_gid; alt_path->dlid = req_msg->alt_local_lid; alt_path->slid = req_msg->alt_remote_lid; alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); alt_path->hop_limit = req_msg->alt_hop_limit; alt_path->traffic_class = req_msg->alt_traffic_class; alt_path->reversible = 1; alt_path->pkey = req_msg->pkey; alt_path->sl = cm_req_get_alt_sl(req_msg); alt_path->mtu_selector = IB_SA_EQ; alt_path->mtu = cm_req_get_path_mtu(req_msg); alt_path->rate_selector = IB_SA_EQ; alt_path->rate = cm_req_get_alt_packet_rate(req_msg); alt_path->packet_life_time_selector = IB_SA_EQ; alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); alt_path->service_id = req_msg->service_id; } } static u16 cm_get_bth_pkey(struct cm_work *work) { struct ib_device *ib_dev = work->port->cm_dev->ib_device; u8 port_num = work->port->port_num; u16 pkey_index = work->mad_recv_wc->wc->pkey_index; u16 pkey; int ret; ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey); if (ret) { dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n", port_num, pkey_index, ret); return 0; } return pkey; } static void cm_format_req_event(struct cm_work *work, struct cm_id_private *cm_id_priv, struct ib_cm_id *listen_id) { struct cm_req_msg *req_msg; struct ib_cm_req_event_param *param; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) param->alternate_path = &work->path[1]; else param->alternate_path = NULL; param->remote_ca_guid = req_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(req_msg->local_qkey); param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); param->qp_type = cm_req_get_qp_type(req_msg); param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); param->responder_resources = cm_req_get_init_depth(req_msg); param->initiator_depth = cm_req_get_resp_res(req_msg); param->local_cm_response_timeout = cm_req_get_remote_resp_timeout(req_msg); param->flow_control = cm_req_get_flow_ctrl(req_msg); param->remote_cm_response_timeout = cm_req_get_local_resp_timeout(req_msg); param->retry_count = cm_req_get_retry_count(req_msg); param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); param->srq = cm_req_get_srq(req_msg); + param->ppath_sgid_index = cm_id_priv->av.ah_attr.grh.sgid_index; work->cm_event.private_data = &req_msg->private_data; } static void cm_process_work(struct cm_id_private *cm_id_priv, struct cm_work *work) { int ret; /* We will typically only have the current event to report. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { spin_lock_irq(&cm_id_priv->lock); work = cm_dequeue_work(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); BUG_ON(!work); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); } cm_deref_id(cm_id_priv); if (ret) cm_destroy_id(&cm_id_priv->id, ret); } static void cm_format_mra(struct cm_mra_msg *mra_msg, struct cm_id_private *cm_id_priv, enum cm_msg_response msg_mraed, u8 service_timeout, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); cm_mra_set_msg_mraed(mra_msg, msg_mraed); mra_msg->local_comm_id = cm_id_priv->id.local_id; mra_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_mra_set_service_timeout(mra_msg, service_timeout); if (private_data && private_data_len) memcpy(mra_msg->private_data, private_data, private_data_len); } static void cm_format_rej(struct cm_rej_msg *rej_msg, struct cm_id_private *cm_id_priv, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); rej_msg->remote_comm_id = cm_id_priv->id.remote_id; switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: rej_msg->local_comm_id = 0; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_MRA_REQ_SENT: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); break; default: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); break; } rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); } if (private_data && private_data_len) memcpy(rej_msg->private_data, private_data, private_data_len); } static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; int ret; atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_REQ_COUNTER]); /* Quick state check to discard duplicate REQs. */ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) return; ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) return; spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_MRA_REQ_SENT: cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); break; case IB_CM_TIMEWAIT: cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); break; default: goto unlock; } spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); } static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; /* Check for possible duplicate REQ. */ spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); cm_deref_id(cur_cm_id_priv); } return NULL; } /* Check for stale connections. */ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); return NULL; } /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, req_msg->service_id); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); goto out; } atomic_inc(&listen_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); out: return listen_cm_id_priv; } /* * Work-around for inter-subnet connections. If the LIDs are permissive, * we need to override the LID/SL data in the REQ with the LID information * in the work completion. */ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { req_msg->primary_local_lid = cpu_to_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); } if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { req_msg->alt_local_lid = cpu_to_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); } } static int cm_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; union ib_gid gid; struct ib_gid_attr gid_attr; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv->id.remote_id = req_msg->local_comm_id; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; kfree(cm_id_priv->timewait_info); goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; cm_id_priv->id.service_id = req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit; ret = ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, cm_id_priv->av.ah_attr.grh.sgid_index, &gid, &gid_attr); if (!ret) { if (gid_attr.ndev) { work->path[0].ifindex = gid_attr.ndev->if_index; work->path[0].net = dev_net(gid_attr.ndev); dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, cm_id_priv); } if (ret) { int err = ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, 0, &work->path[0].sgid, &gid_attr); if (!err && gid_attr.ndev) { work->path[0].ifindex = gid_attr.ndev->if_index; work->path[0].net = dev_net(gid_attr.ndev); dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( cm_req_get_local_resp_timeout(req_msg)); cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); cm_id_priv->pkey = req_msg->pkey; cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(listen_cm_id_priv); return 0; rejected: atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); destroy: ib_destroy_cm_id(cm_id); return ret; } static void cm_format_rep(struct cm_rep_msg *rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_rep_param *param) { cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_srq(rep_msg, param->srq); cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); } else { cm_rep_set_srq(rep_msg, 1); cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); } if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; unsigned long flags; int ret; if (param->private_data && param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REQ_RCVD && cm_id->state != IB_CM_MRA_REQ_SENT) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_REP_SENT; cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rep); static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); rtu_msg->local_comm_id = cm_id_priv->id.local_id; rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; if (private_data && private_data_len) memcpy(rtu_msg->private_data, private_data, private_data_len); } int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; void *data; int ret; if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REP_RCVD && cm_id->state != IB_CM_MRA_REP_SENT) { ret = -EINVAL; goto error; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error; cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); kfree(data); return ret; } cm_id->state = IB_CM_ESTABLISHED; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return ret; } EXPORT_SYMBOL(ib_send_cm_rtu); static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); param->failover_accepted = cm_rep_get_failover(rep_msg); param->flow_control = cm_rep_get_flow_ctrl(rep_msg); param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); param->srq = cm_rep_get_srq(rep_msg); work->cm_event.private_data = &rep_msg->private_data; } static void cm_dup_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, rep_msg->local_comm_id); if (!cm_id_priv) return; atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state == IB_CM_ESTABLISHED) cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); else goto unlock; spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); deref: cm_deref_id(cm_id_priv); } static int cm_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; int ret; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); if (!cm_id_priv) { cm_dup_rep_handler(work); return -EINVAL; } cm_format_rep_event(work, cm_id_priv->qp_type); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: break; default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } /* Check for a stale connection. */ if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; goto error; } spin_unlock(&cm.lock); cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); cm_id_priv->av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->av.timeout - 1); cm_id_priv->alt_av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); /* todo: handle peer_to_peer */ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; error: cm_deref_id(cm_id_priv); return ret; } static int cm_establish_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; int ret; /* See comment in cm_establish about lookup. */ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); if (!cm_id_priv) return -EINVAL; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { spin_unlock_irq(&cm_id_priv->lock); goto out; } ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_rtu_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rtu_msg *rtu_msg; int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, rtu_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &rtu_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); if (private_data && private_data_len) memcpy(dreq_msg->private_data, private_data, private_data_len); } int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED) { ret = -EINVAL; goto out; } if (cm_id->lap_state == IB_CM_LAP_SENT || cm_id->lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) { cm_enter_timewait(cm_id_priv); goto out; } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_DREQ_SENT; cm_id_priv->msg = msg; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_dreq); static void cm_format_drep(struct cm_drep_msg *drep_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); drep_msg->local_comm_id = cm_id_priv->id.local_id; drep_msg->remote_comm_id = cm_id_priv->id.remote_id; if (private_data && private_data_len) memcpy(drep_msg->private_data, private_data, private_data_len); } int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; void *data; int ret; if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_DREQ_RCVD) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return -EINVAL; } cm_set_private_data(cm_id_priv, data, private_data_len); cm_enter_timewait(cm_id_priv); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_drep); static int cm_issue_drep(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_buf *msg = NULL; struct cm_dreq_msg *dreq_msg; struct cm_drep_msg *drep_msg; int ret; ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); if (ret) return ret; dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; drep_msg = (struct cm_drep_msg *) msg->mad; cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); drep_msg->remote_comm_id = dreq_msg->local_comm_id; drep_msg->local_comm_id = dreq_msg->remote_comm_id; ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); return ret; } static int cm_dreq_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; } work->cm_event.private_data = &dreq_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) goto unlock; switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_DREQ_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); goto unlock; default: goto unlock; } cm_id_priv->id.state = IB_CM_DREQ_RCVD; cm_id_priv->tid = dreq_msg->hdr.tid; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_drep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_drep_msg *drep_msg; int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, drep_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &drep_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && cm_id_priv->id.state != IB_CM_DREQ_RCVD) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_enter_timewait(cm_id_priv); ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id->state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: ret = cm_alloc_msg(cm_id_priv, &msg); if (!ret) cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len); cm_reset_to_idle(cm_id_priv); break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ret = cm_alloc_msg(cm_id_priv, &msg); if (!ret) cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len); cm_enter_timewait(cm_id_priv); break; default: ret = -EINVAL; goto out; } if (ret) goto out; ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rej); static void cm_format_rej_event(struct cm_work *work) { struct cm_rej_msg *rej_msg; struct ib_cm_rej_event_param *param; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rej_rcvd; param->ari = rej_msg->ari; param->ari_length = cm_rej_get_reject_info_len(rej_msg); param->reason = __be16_to_cpu(rej_msg->reason); work->cm_event.private_data = &rej_msg->private_data; } static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; __be32 remote_id; remote_id = rej_msg->local_comm_id; if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { spin_lock_irq(&cm.lock); timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), remote_id); if (!timewait_info) { spin_unlock_irq(&cm.lock); return NULL; } cm_id_priv = idr_find(&cm.local_id_table, (__force int) (timewait_info->work.local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } spin_unlock_irq(&cm.lock); } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); else cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); return cm_id_priv; } static int cm_rej_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rej_msg *rej_msg; int ret; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_rejected_id(rej_msg); if (!cm_id_priv) return -EINVAL; cm_format_rej_event(work); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) cm_enter_timewait(cm_id_priv); else cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: cm_enter_timewait(cm_id_priv); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); break; } /* fall through */ default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto out; } ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; enum ib_cm_state cm_state; enum ib_cm_lap_state lap_state; enum cm_msg_response msg_response; void *data; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: cm_state = IB_CM_MRA_REQ_SENT; lap_state = cm_id->lap_state; msg_response = CM_MSG_RESPONSE_REQ; break; case IB_CM_REP_RCVD: cm_state = IB_CM_MRA_REP_SENT; lap_state = cm_id->lap_state; msg_response = CM_MSG_RESPONSE_REP; break; case IB_CM_ESTABLISHED: if (cm_id->lap_state == IB_CM_LAP_RCVD) { cm_state = cm_id->state; lap_state = IB_CM_MRA_LAP_SENT; msg_response = CM_MSG_RESPONSE_OTHER; break; } default: ret = -EINVAL; goto error1; } if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error1; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; } cm_id->state = cm_state; cm_id->lap_state = lap_state; cm_id_priv->service_timeout = service_timeout; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return ret; error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); cm_free_msg(msg); return ret; } EXPORT_SYMBOL(ib_send_cm_mra); static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) { switch (cm_mra_get_msg_mraed(mra_msg)) { case CM_MSG_RESPONSE_REQ: return cm_acquire_id(mra_msg->remote_comm_id, 0); case CM_MSG_RESPONSE_REP: case CM_MSG_RESPONSE_OTHER: return cm_acquire_id(mra_msg->remote_comm_id, mra_msg->local_comm_id); default: return NULL; } } static int cm_mra_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_mra_msg *mra_msg; int timeout, ret; mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_mraed_id(mra_msg); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &mra_msg->private_data; work->cm_event.param.mra_rcvd.service_timeout = cm_mra_get_service_timeout(mra_msg); timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + cm_convert_to_ms(cm_id_priv->av.timeout); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; case IB_CM_ESTABLISHED: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) atomic_long_inc(&work->port-> counter_group[CM_RECV_DUPLICATES]. counter[CM_MRA_COUNTER]); goto out; } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; case IB_CM_MRA_REQ_RCVD: case IB_CM_MRA_REP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_MRA_COUNTER]); /* fall through */ default: goto out; } cm_id_priv->msg->context[1] = (void *) (unsigned long) cm_id_priv->id.state; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: spin_unlock_irq(&cm_id_priv->lock); cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_lap(struct cm_lap_msg *lap_msg, struct cm_id_private *cm_id_priv, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); /* todo: need remote CM response timeout */ cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); lap_msg->alt_local_lid = alternate_path->slid; lap_msg->alt_remote_lid = alternate_path->dlid; lap_msg->alt_local_gid = alternate_path->sgid; lap_msg->alt_remote_gid = alternate_path->dgid; cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); lap_msg->alt_hop_limit = alternate_path->hop_limit; cm_lap_set_packet_rate(lap_msg, alternate_path->rate); cm_lap_set_sl(lap_msg, alternate_path->sl); cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ cm_lap_set_local_ack_timeout(lap_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alternate_path->packet_life_time)); if (private_data && private_data_len) memcpy(lap_msg->private_data, private_data, private_data_len); } int ib_send_cm_lap(struct ib_cm_id *cm_id, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || (cm_id->lap_state != IB_CM_LAP_UNINIT && cm_id->lap_state != IB_CM_LAP_IDLE)) { ret = -EINVAL; goto out; } ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto out; cm_id_priv->alt_av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->lap_state = IB_CM_LAP_SENT; cm_id_priv->msg = msg; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_lap); static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct ib_sa_path_rec *path, struct cm_lap_msg *lap_msg) { memset(path, 0, sizeof *path); path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; path->dlid = lap_msg->alt_local_lid; path->slid = lap_msg->alt_remote_lid; path->flow_label = cm_lap_get_flow_label(lap_msg); path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); path->reversible = 1; path->pkey = cm_id_priv->pkey; path->sl = cm_lap_get_sl(lap_msg); path->mtu_selector = IB_SA_EQ; path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; path->rate = cm_lap_get_packet_rate(lap_msg); path->packet_life_time_selector = IB_SA_EQ; path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); path->packet_life_time -= (path->packet_life_time > 0); } static int cm_lap_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; int ret; /* todo: verify LAP request and send reject APR if invalid. */ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, lap_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; param = &work->cm_event.param.lap_rcvd; param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; switch (cm_id_priv->id.lap_state) { case IB_CM_LAP_UNINIT: case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_LAP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); goto unlock; default: goto unlock; } cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, cm_id_priv); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_apr(struct cm_apr_msg *apr_msg, struct cm_id_private *cm_id_priv, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); apr_msg->local_comm_id = cm_id_priv->id.local_id; apr_msg->remote_comm_id = cm_id_priv->id.remote_id; apr_msg->ap_status = (u8) status; if (info && info_length) { apr_msg->info_length = info_length; memcpy(apr_msg->info, info, info_length); } if (private_data && private_data_len) memcpy(apr_msg->private_data, private_data, private_data_len); } int ib_send_cm_apr(struct ib_cm_id *cm_id, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || (info && info_length > IB_CM_APR_INFO_LENGTH)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || (cm_id->lap_state != IB_CM_LAP_RCVD && cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->lap_state = IB_CM_LAP_IDLE; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_apr); static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_apr_msg *apr_msg; int ret; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, apr_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; work->cm_event.private_data = &apr_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; int ret; timewait_info = (struct cm_timewait_info *)work; spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); if (!cm_id_priv) return -EINVAL; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_TIMEWAIT || cm_id_priv->remote_qpn != timewait_info->remote_qpn) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) memcpy(sidr_req_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (!param->path || (param->private_data && param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); if (ret) goto out; cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); goto out; } cm_id->state = IB_CM_SIDR_REQ_SENT; cm_id_priv->msg = msg; spin_unlock_irqrestore(&cm_id_priv->lock, flags); out: return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); static void cm_format_sidr_req_event(struct cm_work *work, + const struct cm_id_private *rx_cm_id, struct ib_cm_id *listen_id) { struct cm_sidr_req_msg *sidr_req_msg; struct ib_cm_sidr_req_event_param *param; sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; param->service_id = sidr_req_msg->service_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; + param->sgid_index = rx_cm_id->av.ah_attr.grh.sgid_index; work->cm_event.private_data = &sidr_req_msg->private_data; } static int cm_sidr_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *cur_cm_id_priv; struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); /* Record SGID/SLID and request ID for lookup. */ sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; wc = work->mad_recv_wc->wc; cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); spin_lock_irq(&cm.lock); cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cur_cm_id_priv = cm_find_listen(cm_id->device, sidr_req_msg->service_id); if (!cur_cm_id_priv) { spin_unlock_irq(&cm.lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; cm_id_priv->id.service_id = sidr_req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); - cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(cur_cm_id_priv); return 0; out: ib_destroy_cm_id(&cm_id_priv->id); return -EINVAL; } static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, cm_id_priv->tid); sidr_rep_msg->request_id = cm_id_priv->id.remote_id; sidr_rep_msg->status = param->status; cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); sidr_rep_msg->service_id = cm_id_priv->id.service_id; sidr_rep_msg->qkey = cpu_to_be32(param->qkey); if (param->info && param->info_length) memcpy(sidr_rep_msg->info, param->info, param->info_length); if (param->private_data && param->private_data_len) memcpy(sidr_rep_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || (param->private_data && param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { ret = -EINVAL; goto error; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error; cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm.lock, flags); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); } spin_unlock_irqrestore(&cm.lock, flags); return 0; error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_rep); static void cm_format_sidr_rep_event(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_rep_rcvd; param->status = sidr_rep_msg->status; param->qkey = be32_to_cpu(sidr_rep_msg->qkey); param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; work->cm_event.private_data = &sidr_rep_msg->private_data; } static int cm_sidr_rep_handler(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct cm_id_private *cm_id_priv; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work); cm_process_work(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_process_send_error(struct ib_mad_send_buf *msg, enum ib_wc_status wc_status) { struct cm_id_private *cm_id_priv; struct ib_cm_event cm_event; enum ib_cm_state state; int ret; memset(&cm_event, 0, sizeof cm_event); cm_id_priv = msg->context[0]; /* Discard old sends or ones without a response. */ spin_lock_irq(&cm_id_priv->lock); state = (enum ib_cm_state) (unsigned long) msg->context[1]; if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; switch (state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REQ_ERROR; break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REP_ERROR; break; case IB_CM_DREQ_SENT: cm_enter_timewait(cm_id_priv); cm_event.event = IB_CM_DREQ_ERROR; break; case IB_CM_SIDR_REQ_SENT: cm_id_priv->id.state = IB_CM_IDLE; cm_event.event = IB_CM_SIDR_REQ_ERROR; break; default: goto discard; } spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); cm_free_msg(msg); if (ret) ib_destroy_cm_id(&cm_id_priv->id); return; discard: spin_unlock_irq(&cm_id_priv->lock); cm_free_msg(msg); } static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; struct cm_port *port; u16 attr_index; port = mad_agent->context; attr_index = be16_to_cpu(((struct ib_mad_hdr *) msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; /* * If the send was in response to a received message (context[0] is not * set to a cm_id), and is not a REJ, then it is a send that was * manually retried. */ if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) msg->retries = 1; atomic_long_add(1 + msg->retries, &port->counter_group[CM_XMIT].counter[attr_index]); if (msg->retries) atomic_long_add(msg->retries, &port->counter_group[CM_XMIT_RETRIES]. counter[attr_index]); switch (mad_send_wc->status) { case IB_WC_SUCCESS: case IB_WC_WR_FLUSH_ERR: cm_free_msg(msg); break; default: if (msg->context[0] && msg->context[1]) cm_process_send_error(msg, mad_send_wc->status); else cm_free_msg(msg); break; } } static void cm_work_handler(struct work_struct *_work) { struct cm_work *work = container_of(_work, struct cm_work, work.work); int ret; switch (work->cm_event.event) { case IB_CM_REQ_RECEIVED: ret = cm_req_handler(work); break; case IB_CM_MRA_RECEIVED: ret = cm_mra_handler(work); break; case IB_CM_REJ_RECEIVED: ret = cm_rej_handler(work); break; case IB_CM_REP_RECEIVED: ret = cm_rep_handler(work); break; case IB_CM_RTU_RECEIVED: ret = cm_rtu_handler(work); break; case IB_CM_USER_ESTABLISHED: ret = cm_establish_handler(work); break; case IB_CM_DREQ_RECEIVED: ret = cm_dreq_handler(work); break; case IB_CM_DREP_RECEIVED: ret = cm_drep_handler(work); break; case IB_CM_SIDR_REQ_RECEIVED: ret = cm_sidr_req_handler(work); break; case IB_CM_SIDR_REP_RECEIVED: ret = cm_sidr_rep_handler(work); break; case IB_CM_LAP_RECEIVED: ret = cm_lap_handler(work); break; case IB_CM_APR_RECEIVED: ret = cm_apr_handler(work); break; case IB_CM_TIMEWAIT_EXIT: ret = cm_timewait_handler(work); break; default: ret = -EINVAL; break; } if (ret) cm_free_work(work); } static int cm_establish(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_work *work; unsigned long flags; int ret = 0; struct cm_device *cm_dev; cm_dev = ib_get_client_data(cm_id->device, &cm_client); if (!cm_dev) return -ENODEV; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id->state) { case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_id->state = IB_CM_ESTABLISHED; break; case IB_CM_ESTABLISHED: ret = -EISCONN; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (ret) { kfree(work); goto out; } /* * The CM worker thread may try to destroy the cm_id before it * can execute this work item. To prevent potential deadlock, * we need to find the cm_id once we're in the context of the * worker thread, rather than holding a reference on it. */ INIT_DELAYED_WORK(&work->work, cm_work_handler); work->local_id = cm_id->local_id; work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; /* Check if the device started its remove_one */ spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) { queue_delayed_work(cm.wq, &work->work, 0); } else { kfree(work); ret = -ENODEV; } spin_unlock_irqrestore(&cm.lock, flags); out: return ret; } static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_av tmp_av; unsigned long flags; int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_ESTABLISHED && (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; /* Swap address vector */ tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; cm_id_priv->alt_av = tmp_av; /* Swap port send ready state */ tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) { int ret; switch (event) { case IB_EVENT_COMM_EST: ret = cm_establish(cm_id); break; case IB_EVENT_PATH_MIG: ret = cm_migrate(cm_id); break; default: ret = -EINVAL; } return ret; } EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; u16 attr_id; int paths = 0; int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> alt_local_lid != 0); event = IB_CM_REQ_RECEIVED; break; case CM_MRA_ATTR_ID: event = IB_CM_MRA_RECEIVED; break; case CM_REJ_ATTR_ID: event = IB_CM_REJ_RECEIVED; break; case CM_REP_ATTR_ID: event = IB_CM_REP_RECEIVED; break; case CM_RTU_ATTR_ID: event = IB_CM_RTU_RECEIVED; break; case CM_DREQ_ATTR_ID: event = IB_CM_DREQ_RECEIVED; break; case CM_DREP_ATTR_ID: event = IB_CM_DREP_RECEIVED; break; case CM_SIDR_REQ_ATTR_ID: event = IB_CM_SIDR_REQ_RECEIVED; break; case CM_SIDR_REP_ATTR_ID: event = IB_CM_SIDR_REP_RECEIVED; break; case CM_LAP_ATTR_ID: paths = 1; event = IB_CM_LAP_RECEIVED; break; case CM_APR_ATTR_ID: event = IB_CM_APR_RECEIVED; break; default: ib_free_recv_mad(mad_recv_wc); return; } attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); atomic_long_inc(&port->counter_group[CM_RECV]. counter[attr_id - CM_ATTR_ID_OFFSET]); work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { ib_free_recv_mad(mad_recv_wc); return; } INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; /* Check if the device started its remove_one */ spin_lock_irq(&cm.lock); if (!port->cm_dev->going_down) queue_delayed_work(cm.wq, &work->work, 0); else going_down = 1; spin_unlock_irq(&cm.lock); if (going_down) { kfree(work); ib_free_recv_mad(mad_recv_wc); } } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); if (cm_id_priv->qp_type == IB_QPT_RC || cm_id_priv->qp_type == IB_QPT_XRC_TGT) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { /* Allow transition to RTS before sending REP */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); switch (cm_id_priv->qp_type) { case IB_QPT_RC: case IB_QPT_XRC_INI: *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; /* fall through */ case IB_QPT_XRC_TGT: *qp_attr_mask |= IB_QP_TIMEOUT; qp_attr->timeout = cm_id_priv->av.timeout; break; default: break; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; } } else { *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct cm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTR: ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(ib_cm_init_qp_attr); static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, char *buf) { struct cm_counter_group *group; struct cm_counter_attribute *cm_attr; group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); return sprintf(buf, "%ld\n", atomic_long_read(&group->counter[cm_attr->index])); } static const struct sysfs_ops cm_counter_ops = { .show = cm_show_counter }; static struct kobj_type cm_counter_obj_type = { .sysfs_ops = &cm_counter_ops, .default_attrs = cm_counter_default_attrs }; static void cm_release_port_obj(struct kobject *obj) { struct cm_port *cm_port; cm_port = container_of(obj, struct cm_port, port_obj); kfree(cm_port); } static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; static char *cm_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } struct class cm_class = { .owner = THIS_MODULE, .name = "infiniband_cm", .devnode = cm_devnode, }; EXPORT_SYMBOL(cm_class); static int cm_create_port_fs(struct cm_port *port) { int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, &port->cm_dev->device->kobj, "%d", port->port_num); if (ret) { kfree(port); return ret; } for (i = 0; i < CM_COUNTER_GROUPS; i++) { ret = kobject_init_and_add(&port->counter_group[i].obj, &cm_counter_obj_type, &port->port_obj, "%s", counter_group_names[i]); if (ret) goto error; } return 0; error: while (i--) kobject_put(&port->counter_group[i].obj); kobject_put(&port->port_obj); return ret; } static void cm_remove_port_fs(struct cm_port *port) { int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) kobject_put(&port->counter_group[i].obj); kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; struct ib_mad_reg_req reg_req = { .mgmt_class = IB_MGMT_CLASS_CM, .mgmt_class_version = IB_CM_CLASS_VERSION, }; struct ib_port_modify port_modify = { .set_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; int ret; int count = 0; u8 i; cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); if (IS_ERR(cm_dev->device)) { kfree(cm_dev); return; } set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; cm_dev->port[i-1] = port; port->cm_dev = cm_dev; port->port_num = i; INIT_LIST_HEAD(&port->cm_priv_prim_list); INIT_LIST_HEAD(&port->cm_priv_altr_list); ret = cm_create_port_fs(port); if (ret) goto error1; port->mad_agent = ib_register_mad_agent(ib_device, i, IB_QPT_GSI, ®_req, 0, cm_send_handler, cm_recv_handler, port, 0); if (IS_ERR(port->mad_agent)) goto error2; ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; count++; } if (!count) goto free; ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); write_unlock_irqrestore(&cm.device_lock, flags); return; error3: ib_unregister_mad_agent(port->mad_agent); error2: cm_remove_port_fs(port); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } free: device_unregister(cm_dev->device); kfree(cm_dev); } static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; struct cm_id_private *cm_id_priv; struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; int i; if (!cm_dev) return; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); spin_lock_irq(&cm.lock); cm_dev->going_down = 1; spin_unlock_irq(&cm.lock); for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); /* Mark all the cm_id's as not valid */ spin_lock_irq(&cm.lock); list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) cm_id_priv->altr_send_port_not_ready = 1; list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) cm_id_priv->prim_send_port_not_ready = 1; spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); spin_lock_irq(&cm.state_lock); cur_mad_agent = port->mad_agent; port->mad_agent = NULL; spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); kfree(cm_dev); } static int __init ib_cm_init(void) { int ret; memset(&cm, 0, sizeof cm); INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); if (ret) { ret = -ENOMEM; goto error1; } cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; goto error2; } ret = ib_register_client(&cm_client); if (ret) goto error3; return 0; error3: destroy_workqueue(cm.wq); error2: class_unregister(&cm_class); error1: idr_destroy(&cm.local_id_table); return ret; } static void __exit ib_cm_cleanup(void) { struct cm_timewait_info *timewait_info, *tmp; spin_lock_irq(&cm.lock); list_for_each_entry(timewait_info, &cm.timewait_list, list) cancel_delayed_work(&timewait_info->work.work); spin_unlock_irq(&cm.lock); ib_unregister_client(&cm_client); destroy_workqueue(cm.wq); list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { cancel_delayed_work_sync(&timewait_info->work.work); list_del(&timewait_info->list); kfree(timewait_info); } class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } module_init_order(ib_cm_init, SI_ORDER_SECOND); module_exit_order(ib_cm_cleanup, SI_ORDER_FIRST); Index: head/sys/ofed/drivers/infiniband/core/ib_cma.c =================================================================== --- head/sys/ofed/drivers/infiniband/core/ib_cma.c (revision 338540) +++ head/sys/ofed/drivers/infiniband/core/ib_cma.c (revision 338541) @@ -1,4387 +1,4413 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #define LINUXKPI_PARAM_PREFIX ibcore_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 static const char * const cma_events[] = { [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", [RDMA_CM_EVENT_ADDR_ERROR] = "address error", [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", [RDMA_CM_EVENT_REJECTED] = "rejected", [RDMA_CM_EVENT_ESTABLISHED] = "established", [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? cma_events[index] : "unrecognized event"; } EXPORT_SYMBOL(rdma_event_msg); static int cma_check_linklocal(struct rdma_dev_addr *, struct sockaddr *); static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); static struct ib_client cma_client = { .name = "cma", .add = cma_add_one, .remove = cma_remove_one }; static struct ib_sa_client sa_client; static struct rdma_addr_client addr_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; struct cma_pernet { struct idr tcp_ps; struct idr udp_ps; struct idr ipoib_ps; struct idr ib_ps; }; VNET_DEFINE(struct cma_pernet, cma_pernet); static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) { struct cma_pernet *retval; CURVNET_SET_QUIET(vnet); retval = &VNET(cma_pernet); CURVNET_RESTORE(); return (retval); } static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) { struct cma_pernet *pernet = cma_pernet_ptr(net); switch (ps) { case RDMA_PS_TCP: return &pernet->tcp_ps; case RDMA_PS_UDP: return &pernet->udp_ps; case RDMA_PS_IPOIB: return &pernet->ipoib_ps; case RDMA_PS_IB: return &pernet->ib_ps; default: return NULL; } } struct cma_device { struct list_head list; struct ib_device *device; struct completion comp; atomic_t refcount; struct list_head id_list; struct sysctl_ctx_list sysctl_ctx; enum ib_gid_type *default_gid_type; }; struct rdma_bind_list { enum rdma_port_space ps; struct hlist_head owners; unsigned short port; }; struct class_port_info_context { struct ib_class_port_info *class_port_info; struct ib_device *device; struct completion done; struct ib_sa_query *sa_query; u8 port_num; }; static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { struct idr *idr = cma_pernet_idr(vnet, ps); return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); } static struct rdma_bind_list *cma_ps_find(struct vnet *net, enum rdma_port_space ps, int snum) { struct idr *idr = cma_pernet_idr(net, ps); return idr_find(idr, snum); } static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) { struct idr *idr = cma_pernet_idr(net, ps); idr_remove(idr, snum); } enum { CMA_OPTION_AFONLY, }; void cma_ref_dev(struct cma_device *cma_dev) { atomic_inc(&cma_dev->refcount); } struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, void *cookie) { struct cma_device *cma_dev; struct cma_device *found_cma_dev = NULL; mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) if (filter(cma_dev->device, cookie)) { found_cma_dev = cma_dev; break; } if (found_cma_dev) cma_ref_dev(found_cma_dev); mutex_unlock(&lock); return found_cma_dev; } int cma_get_default_gid_type(struct cma_device *cma_dev, unsigned int port) { if (port < rdma_start_port(cma_dev->device) || port > rdma_end_port(cma_dev->device)) return -EINVAL; return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; } int cma_set_default_gid_type(struct cma_device *cma_dev, unsigned int port, enum ib_gid_type default_gid_type) { unsigned long supported_gids; if (port < rdma_start_port(cma_dev->device) || port > rdma_end_port(cma_dev->device)) return -EINVAL; supported_gids = roce_gid_type_mask_support(cma_dev->device, port); if (!(supported_gids & 1 << default_gid_type)) return -EINVAL; cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = default_gid_type; return 0; } struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) { return cma_dev->device; } /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. * We do this by disabling removal notification while a callback is in process, * and reporting it after the callback completes. */ struct rdma_id_private { struct rdma_cm_id id; struct rdma_bind_list *bind_list; struct hlist_node node; struct list_head list; /* listen_any_list or cma_device.list */ struct list_head listen_list; /* per device listens */ struct cma_device *cma_dev; struct list_head mc_list; int internal_id; enum rdma_cm_state state; spinlock_t lock; struct mutex qp_mutex; struct completion comp; atomic_t refcount; struct mutex handler_mutex; int backlog; int timeout_ms; struct ib_sa_query *query; int query_id; union { struct ib_cm_id *ib; struct iw_cm_id *iw; } cm_id; u32 seq_num; u32 qkey; u32 qp_num; pid_t owner; u32 options; u8 srq; u8 tos; u8 reuseaddr; u8 afonly; enum ib_gid_type gid_type; }; struct cma_multicast { struct rdma_id_private *id_priv; union { struct ib_sa_multicast *ib; } multicast; struct list_head list; void *context; struct sockaddr_storage addr; struct kref mcref; bool igmp_joined; u8 join_state; }; struct cma_work { struct work_struct work; struct rdma_id_private *id; enum rdma_cm_state old_state; enum rdma_cm_state new_state; struct rdma_cm_event event; }; struct cma_ndev_work { struct work_struct work; struct rdma_id_private *id; struct rdma_cm_event event; }; struct iboe_mcast_work { struct work_struct work; struct rdma_id_private *id; struct cma_multicast *mc; }; union cma_ip_addr { struct in6_addr ip6; struct { __be32 pad[3]; __be32 addr; } ip4; }; struct cma_hdr { u8 cma_version; u8 ip_version; /* IP version: 7:4 */ __be16 port; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; }; #define CMA_VERSION 0x00 struct cma_req_info { struct ib_device *device; int port; union ib_gid local_gid; __be64 service_id; u16 pkey; bool has_gid:1; }; static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { unsigned long flags; int ret; spin_lock_irqsave(&id_priv->lock, flags); ret = (id_priv->state == comp); spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } static int cma_comp_exch(struct rdma_id_private *id_priv, enum rdma_cm_state comp, enum rdma_cm_state exch) { unsigned long flags; int ret; spin_lock_irqsave(&id_priv->lock, flags); if ((ret = (id_priv->state == comp))) id_priv->state = exch; spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, enum rdma_cm_state exch) { unsigned long flags; enum rdma_cm_state old; spin_lock_irqsave(&id_priv->lock, flags); old = id_priv->state; id_priv->state = exch; spin_unlock_irqrestore(&id_priv->lock, flags); return old; } static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) { return hdr->ip_version >> 4; } static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) { hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } static int cma_igmp_send(struct net_device *ndev, const union ib_gid *mgid, bool join) { int retval; if (ndev) { union { struct sockaddr sock; struct sockaddr_storage storage; } addr; rdma_gid2ip(&addr.sock, mgid); CURVNET_SET_QUIET(ndev->if_vnet); if (join) retval = -if_addmulti(ndev, &addr.sock, NULL); else retval = -if_delmulti(ndev, &addr.sock); CURVNET_RESTORE(); } else { retval = -ENODEV; } return retval; } static void _cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { cma_ref_dev(cma_dev); id_priv->cma_dev = cma_dev; id_priv->gid_type = 0; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { _cma_attach_to_dev(id_priv, cma_dev); id_priv->gid_type = cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(cma_dev->device)]; } void cma_deref_dev(struct cma_device *cma_dev) { if (atomic_dec_and_test(&cma_dev->refcount)) complete(&cma_dev->comp); } static inline void release_mc(struct kref *kref) { struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); kfree(mc->multicast.ib); kfree(mc); } static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); list_del(&id_priv->list); cma_deref_dev(id_priv->cma_dev); id_priv->cma_dev = NULL; mutex_unlock(&lock); } static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) { return (struct sockaddr *) &id_priv->id.route.addr.src_addr; } static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) { return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; } static inline unsigned short cma_family(struct rdma_id_private *id_priv) { return id_priv->id.route.addr.src_addr.ss_family; } static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) { struct ib_sa_mcmember_rec rec; int ret = 0; if (id_priv->qkey) { if (qkey && id_priv->qkey != qkey) return -EINVAL; return 0; } if (qkey) { id_priv->qkey = qkey; return 0; } switch (id_priv->id.ps) { case RDMA_PS_UDP: case RDMA_PS_IB: id_priv->qkey = RDMA_UDP_QKEY; break; case RDMA_PS_IPOIB: ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, &rec.mgid, &rec); if (!ret) id_priv->qkey = be32_to_cpu(rec.qkey); break; default: break; } return ret; } static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) { dev_addr->dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); } static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { int ret; if (addr->sa_family != AF_IB) { ret = rdma_translate_ip(addr, dev_addr); } else { cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); ret = 0; } return ret; } static inline int cma_validate_port(struct ib_device *device, u8 port, enum ib_gid_type gid_type, union ib_gid *gid, const struct rdma_dev_addr *dev_addr) { const int dev_type = dev_addr->dev_type; struct net_device *ndev; int ret = -ENODEV; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) return ret; if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); } else { ndev = NULL; gid_type = IB_GID_TYPE_IB; } ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, ndev, NULL); if (ndev) dev_put(ndev); return ret; } static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; int ret = -ENODEV; u8 port; if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) return -EINVAL; mutex_lock(&lock); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &iboe_gid); memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); if (listen_id_priv) { cma_dev = listen_id_priv->cma_dev; port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, rdma_protocol_ib(cma_dev->device, port) ? IB_GID_TYPE_IB : listen_id_priv->gid_type, gidp, dev_addr); if (!ret) { id_priv->id.port_num = port; goto out; } } list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { if (listen_id_priv && listen_id_priv->cma_dev == cma_dev && listen_id_priv->id.port_num == port) continue; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, rdma_protocol_ib(cma_dev->device, port) ? IB_GID_TYPE_IB : cma_dev->default_gid_type[port - 1], gidp, dev_addr); if (!ret) { id_priv->id.port_num = port; goto out; } } } out: if (!ret) cma_attach_to_dev(id_priv, cma_dev); mutex_unlock(&lock); return ret; } /* * Select the source IB device and address to reach the destination IB address. */ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) { struct cma_device *cma_dev, *cur_dev; struct sockaddr_ib *addr; union ib_gid gid, sgid, *dgid; u16 pkey, index; u8 p; int i; cma_dev = NULL; addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); dgid = (union ib_gid *) &addr->sib_addr; pkey = ntohs(addr->sib_pkey); list_for_each_entry(cur_dev, &dev_list, list) { for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!rdma_cap_af_ib(cur_dev->device, p)) continue; if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid, NULL); i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; goto found; } if (!cma_dev && (gid.global.subnet_prefix == dgid->global.subnet_prefix)) { cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; } } } } if (!cma_dev) return -ENODEV; found: cma_attach_to_dev(id_priv, cma_dev); addr = (struct sockaddr_ib *) cma_src_addr(id_priv); memcpy(&addr->sib_addr, &sgid, sizeof sgid); cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); return 0; } static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) complete(&id_priv->comp); } struct rdma_cm_id *rdma_create_id(struct vnet *net, rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { struct rdma_id_private *id_priv; +#ifdef VIMAGE + if (net == NULL) + return ERR_PTR(-EINVAL); +#endif id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); if (!id_priv) return ERR_PTR(-ENOMEM); id_priv->owner = task_pid_nr(current); id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); - id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread); + id_priv->id.route.addr.dev_addr.net = net; return &id_priv->id; } EXPORT_SYMBOL(rdma_create_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) return ret; qp_attr.qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); if (ret) return ret; qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); return ret; } static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; return ib_modify_qp(qp, &qp_attr, qp_attr_mask); } int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) return -EINVAL; qp_init_attr->port_num = id->port_num; qp = ib_create_qp(pd, qp_init_attr); if (IS_ERR(qp)) return PTR_ERR(qp); if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); if (ret) goto err; id->qp = qp; id_priv->qp_num = qp->qp_num; id_priv->srq = (qp->srq != NULL); return 0; err: ib_destroy_qp(qp); return ret; } EXPORT_SYMBOL(rdma_create_qp); void rdma_destroy_qp(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); mutex_lock(&id_priv->qp_mutex); ib_destroy_qp(id_priv->id.qp); id_priv->id.qp = NULL; mutex_unlock(&id_priv->qp_mutex); } EXPORT_SYMBOL(rdma_destroy_qp); static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } /* Need to update QP attributes from default values. */ qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); if (ret) goto out; qp_attr.qp_state = IB_QPS_RTR; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); if (ret) goto out; BUG_ON(id_priv->cma_dev->device != id_priv->id.device); if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_modify_qp_rts(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } qp_attr.qp_state = IB_QPS_RTS; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; if (conn_param) qp_attr.max_rd_atomic = conn_param->initiator_depth; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_modify_qp_err(struct rdma_id_private *id_priv) { struct ib_qp_attr qp_attr; int ret; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } qp_attr.qp_state = IB_QPS_ERR; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int ret; u16 pkey; if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) pkey = 0xffff; else pkey = ib_addr_get_pkey(dev_addr); ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, pkey, &qp_attr->pkey_index); if (ret) return ret; qp_attr->port_num = id_priv->id.port_num; *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { ret = cma_set_qkey(id_priv, 0); if (ret) return ret; qp_attr->qkey = id_priv->qkey; *qp_attr_mask |= IB_QP_QKEY; } else { qp_attr->qp_access_flags = 0; *qp_attr_mask |= IB_QP_ACCESS_FLAGS; } return 0; } int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_id_private *id_priv; int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); if (rdma_cap_ib_cm(id->device, id->port_num)) { if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; } else if (rdma_cap_iw_cm(id->device, id->port_num)) { if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); qp_attr->port_num = id_priv->id.port_num; *qp_attr_mask |= IB_QP_PORT; } else ret = -ENOSYS; return ret; } EXPORT_SYMBOL(rdma_init_qp_attr); static inline int cma_zero_addr(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); case AF_IB: return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); default: return 0; } } static inline int cma_loopback_addr(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); case AF_INET6: return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); case AF_IB: return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); default: return 0; } } static inline int cma_any_addr(struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) { if (src->sa_family != dst->sa_family) return -1; switch (src->sa_family) { case AF_INET: return ((struct sockaddr_in *) src)->sin_addr.s_addr != ((struct sockaddr_in *) dst)->sin_addr.s_addr; case AF_INET6: return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, &((struct sockaddr_in6 *) dst)->sin6_addr); default: return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, &((struct sockaddr_ib *) dst)->sib_addr); } } static __be16 cma_port(struct sockaddr *addr) { struct sockaddr_ib *sib; switch (addr->sa_family) { case AF_INET: return ((struct sockaddr_in *) addr)->sin_port; case AF_INET6: return ((struct sockaddr_in6 *) addr)->sin6_port; case AF_IB: sib = (struct sockaddr_ib *) addr; return htons((u16) (be64_to_cpu(sib->sib_sid) & be64_to_cpu(sib->sib_sid_mask))); default: return 0; } } static inline int cma_any_port(struct sockaddr *addr) { return !cma_port(addr); } static void cma_save_ib_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_cm_id *listen_id, struct ib_sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; if (src_addr) { ib = (struct sockaddr_ib *)src_addr; ib->sib_family = AF_IB; if (path) { ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->sgid, 16); ib->sib_sid = path->service_id; ib->sib_scope_id = 0; } else { ib->sib_pkey = listen_ib->sib_pkey; ib->sib_flowinfo = listen_ib->sib_flowinfo; ib->sib_addr = listen_ib->sib_addr; ib->sib_sid = listen_ib->sib_sid; ib->sib_scope_id = listen_ib->sib_scope_id; } ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); } if (dst_addr) { ib = (struct sockaddr_ib *)dst_addr; ib->sib_family = AF_IB; if (path) { ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->dgid, 16); } } } static void cma_save_ip4_info(struct sockaddr_in *src_addr, struct sockaddr_in *dst_addr, struct cma_hdr *hdr, __be16 local_port) { if (src_addr) { *src_addr = (struct sockaddr_in) { .sin_len = sizeof(struct sockaddr_in), .sin_family = AF_INET, .sin_addr.s_addr = hdr->dst_addr.ip4.addr, .sin_port = local_port, }; } if (dst_addr) { *dst_addr = (struct sockaddr_in) { .sin_len = sizeof(struct sockaddr_in), .sin_family = AF_INET, .sin_addr.s_addr = hdr->src_addr.ip4.addr, .sin_port = hdr->port, }; } } static void cma_ip6_clear_scope_id(struct in6_addr *addr) { /* make sure link local scope ID gets zeroed */ if (IN6_IS_SCOPE_LINKLOCAL(addr) || IN6_IS_ADDR_MC_INTFACELOCAL(addr)) { /* use byte-access to be alignment safe */ addr->s6_addr[2] = 0; addr->s6_addr[3] = 0; } } static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, struct sockaddr_in6 *dst_addr, struct cma_hdr *hdr, __be16 local_port) { if (src_addr) { *src_addr = (struct sockaddr_in6) { .sin6_len = sizeof(struct sockaddr_in6), .sin6_family = AF_INET6, .sin6_addr = hdr->dst_addr.ip6, .sin6_port = local_port, }; cma_ip6_clear_scope_id(&src_addr->sin6_addr); } if (dst_addr) { *dst_addr = (struct sockaddr_in6) { .sin6_len = sizeof(struct sockaddr_in6), .sin6_family = AF_INET6, .sin6_addr = hdr->src_addr.ip6, .sin6_port = hdr->port, }; cma_ip6_clear_scope_id(&dst_addr->sin6_addr); } } static u16 cma_port_from_service_id(__be64 service_id) { return (u16)be64_to_cpu(service_id); } static int cma_save_ip_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct ib_cm_event *ib_event, __be64 service_id) { struct cma_hdr *hdr; __be16 port; hdr = ib_event->private_data; if (hdr->cma_version != CMA_VERSION) return -EINVAL; port = htons(cma_port_from_service_id(service_id)); switch (cma_get_ip_ver(hdr)) { case 4: cma_save_ip4_info((struct sockaddr_in *)src_addr, (struct sockaddr_in *)dst_addr, hdr, port); break; case 6: cma_save_ip6_info((struct sockaddr_in6 *)src_addr, (struct sockaddr_in6 *)dst_addr, hdr, port); break; default: return -EAFNOSUPPORT; } return 0; } static int cma_save_net_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, sa_family_t sa_family, __be64 service_id) { if (sa_family == AF_IB) { if (ib_event->event == IB_CM_REQ_RECEIVED) cma_save_ib_info(src_addr, dst_addr, listen_id, ib_event->param.req_rcvd.primary_path); else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); return 0; } return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); } static int cma_save_req_info(const struct ib_cm_event *ib_event, struct cma_req_info *req) { const struct ib_cm_req_event_param *req_param = &ib_event->param.req_rcvd; const struct ib_cm_sidr_req_event_param *sidr_param = &ib_event->param.sidr_req_rcvd; switch (ib_event->event) { case IB_CM_REQ_RECEIVED: req->device = req_param->listen_id->device; req->port = req_param->port; memcpy(&req->local_gid, &req_param->primary_path->sgid, sizeof(req->local_gid)); req->has_gid = true; req->service_id = req_param->primary_path->service_id; req->pkey = be16_to_cpu(req_param->primary_path->pkey); if (req->pkey != req_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" "RDMA CMA: in the future this may cause the request to be dropped\n", req_param->bth_pkey, req->pkey); break; case IB_CM_SIDR_REQ_RECEIVED: req->device = sidr_param->listen_id->device; req->port = sidr_param->port; req->has_gid = false; req->service_id = sidr_param->service_id; req->pkey = sidr_param->pkey; if (req->pkey != sidr_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" "RDMA CMA: in the future this may cause the request to be dropped\n", sidr_param->bth_pkey, req->pkey); break; default: return -EINVAL; } return 0; } static bool validate_ipv4_net_dev(struct net_device *net_dev, const struct sockaddr_in *dst_addr, const struct sockaddr_in *src_addr) { #ifdef INET struct sockaddr_in src_tmp = *src_addr; __be32 daddr = dst_addr->sin_addr.s_addr, saddr = src_addr->sin_addr.s_addr; struct net_device *dst_dev; struct rtentry *rte; bool ret; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || ipv4_is_loopback(saddr)) return false; dst_dev = ip_dev_find(net_dev->if_vnet, daddr); if (dst_dev != net_dev) { if (dst_dev != NULL) dev_put(dst_dev); return false; } dev_put(dst_dev); /* * Make sure the socket address length field * is set, else rtalloc1() will fail. */ src_tmp.sin_len = sizeof(src_tmp); CURVNET_SET(net_dev->if_vnet); rte = rtalloc1((struct sockaddr *)&src_tmp, 1, 0); if (rte != NULL) { ret = (rte->rt_ifp == net_dev); RTFREE_LOCKED(rte); } else { ret = false; } CURVNET_RESTORE(); return ret; #else return false; #endif } static bool validate_ipv6_net_dev(struct net_device *net_dev, const struct sockaddr_in6 *dst_addr, const struct sockaddr_in6 *src_addr) { #ifdef INET6 struct sockaddr_in6 src_tmp = *src_addr; struct in6_addr in6_addr = dst_addr->sin6_addr; struct net_device *dst_dev; struct rtentry *rte; bool ret; dst_dev = ip6_dev_find(net_dev->if_vnet, in6_addr); if (dst_dev != net_dev) { if (dst_dev != NULL) dev_put(dst_dev); return false; } CURVNET_SET(net_dev->if_vnet); /* * Make sure the socket address length field * is set, else rtalloc1() will fail. */ src_tmp.sin6_len = sizeof(src_tmp); /* * Make sure the scope ID gets embedded, else rtalloc1() will * resolve to the loopback interface. */ src_tmp.sin6_scope_id = net_dev->if_index; sa6_embedscope(&src_tmp, 0); rte = rtalloc1((struct sockaddr *)&src_tmp, 1, 0); if (rte != NULL) { ret = (rte->rt_ifp == net_dev); RTFREE_LOCKED(rte); } else { ret = false; } CURVNET_RESTORE(); return ret; #else return false; #endif } static bool validate_net_dev(struct net_device *net_dev, const struct sockaddr *daddr, const struct sockaddr *saddr) { const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; switch (daddr->sa_family) { case AF_INET: return saddr->sa_family == AF_INET && validate_ipv4_net_dev(net_dev, daddr4, saddr4); case AF_INET6: return saddr->sa_family == AF_INET6 && validate_ipv6_net_dev(net_dev, daddr6, saddr6); default: return false; } } +static struct net_device * +roce_get_net_dev_by_cm_event(struct ib_device *device, u8 port_num, + const struct ib_cm_event *ib_event) +{ + struct ib_gid_attr sgid_attr; + union ib_gid sgid; + int err = -EINVAL; + + if (ib_event->event == IB_CM_REQ_RECEIVED) { + err = ib_get_cached_gid(device, port_num, + ib_event->param.req_rcvd.ppath_sgid_index, &sgid, &sgid_attr); + } else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { + err = ib_get_cached_gid(device, port_num, + ib_event->param.sidr_req_rcvd.sgid_index, &sgid, &sgid_attr); + } + if (err) + return (NULL); + return (sgid_attr.ndev); +} + static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, const struct cma_req_info *req) { struct sockaddr_storage listen_addr_storage, src_addr_storage; struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, *src_addr = (struct sockaddr *)&src_addr_storage; struct net_device *net_dev; const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; int err; err = cma_save_ip_info(listen_addr, src_addr, ib_event, req->service_id); if (err) return ERR_PTR(err); - net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, - gid, listen_addr); + if (rdma_protocol_roce(req->device, req->port)) { + net_dev = roce_get_net_dev_by_cm_event(req->device, req->port, + ib_event); + } else { + net_dev = ib_get_net_dev_by_params(req->device, req->port, + req->pkey, + gid, listen_addr); + } if (!net_dev) return ERR_PTR(-ENODEV); if (!validate_net_dev(net_dev, listen_addr, src_addr)) { dev_put(net_dev); return ERR_PTR(-EHOSTUNREACH); } return net_dev; } static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) { return (be64_to_cpu(service_id) >> 16) & 0xffff; } static bool cma_match_private_data(struct rdma_id_private *id_priv, const struct cma_hdr *hdr) { struct sockaddr *addr = cma_src_addr(id_priv); __be32 ip4_addr; struct in6_addr ip6_addr; if (cma_any_addr(addr) && !id_priv->afonly) return true; switch (addr->sa_family) { case AF_INET: ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; if (cma_get_ip_ver(hdr) != 4) return false; if (!cma_any_addr(addr) && hdr->dst_addr.ip4.addr != ip4_addr) return false; break; case AF_INET6: ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; if (cma_get_ip_ver(hdr) != 6) return false; cma_ip6_clear_scope_id(&ip6_addr); if (!cma_any_addr(addr) && memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) return false; break; case AF_IB: return true; default: return false; } return true; } static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) { enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); enum rdma_transport_type transport = rdma_node_get_transport(device->node_type); return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; } static bool cma_protocol_roce(const struct rdma_cm_id *id) { struct ib_device *device = id->device; const int port_num = id->port_num ?: rdma_start_port(device); return cma_protocol_roce_dev_port(device, port_num); } static bool cma_match_net_dev(const struct rdma_cm_id *id, const struct net_device *net_dev, u8 port_num) { const struct rdma_addr *addr = &id->route.addr; if (!net_dev) /* This request is an AF_IB request or a RoCE request */ return (!id->port_num || id->port_num == port_num) && (addr->src_addr.ss_family == AF_IB || cma_protocol_roce_dev_port(id->device, port_num)); return !addr->dev_addr.bound_dev_if || (net_eq(dev_net(net_dev), addr->dev_addr.net) && addr->dev_addr.bound_dev_if == net_dev->if_index); } static struct rdma_id_private *cma_find_listener( const struct rdma_bind_list *bind_list, const struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event, const struct cma_req_info *req, const struct net_device *net_dev) { struct rdma_id_private *id_priv, *id_priv_dev; if (!bind_list) return ERR_PTR(-EINVAL); hlist_for_each_entry(id_priv, &bind_list->owners, node) { if (cma_match_private_data(id_priv, ib_event->private_data)) { if (id_priv->id.device == cm_id->device && cma_match_net_dev(&id_priv->id, net_dev, req->port)) return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, listen_list) { if (id_priv_dev->id.device == cm_id->device && cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) return id_priv_dev; } } } return ERR_PTR(-EINVAL); } static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event, struct net_device **net_dev) { struct cma_req_info req; struct rdma_bind_list *bind_list; struct rdma_id_private *id_priv; int err; err = cma_save_req_info(ib_event, &req); if (err) return ERR_PTR(err); *net_dev = cma_get_net_dev(ib_event, &req); if (IS_ERR(*net_dev)) { if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { /* Assuming the protocol is AF_IB */ - *net_dev = NULL; - } else if (cma_protocol_roce_dev_port(req.device, req.port)) { - /* TODO find the net dev matching the request parameters - * through the RoCE GID table */ *net_dev = NULL; } else { return ERR_CAST(*net_dev); } } bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; } return id_priv; } static inline int cma_user_data_offset(struct rdma_id_private *id_priv) { return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } static void cma_cancel_route(struct rdma_id_private *id_priv) { if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); } } static void cma_cancel_listens(struct rdma_id_private *id_priv) { struct rdma_id_private *dev_id_priv; /* * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ mutex_lock(&lock); list_del(&id_priv->list); while (!list_empty(&id_priv->listen_list)) { dev_id_priv = list_entry(id_priv->listen_list.next, struct rdma_id_private, listen_list); /* sync with device removal to avoid duplicate destruction */ list_del_init(&dev_id_priv->list); list_del(&dev_id_priv->listen_list); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); mutex_lock(&lock); } mutex_unlock(&lock); } static void cma_cancel_operation(struct rdma_id_private *id_priv, enum rdma_cm_state state) { switch (state) { case RDMA_CM_ADDR_QUERY: rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; case RDMA_CM_ROUTE_QUERY: cma_cancel_route(id_priv); break; case RDMA_CM_LISTEN: if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) cma_cancel_listens(id_priv); break; default: break; } } static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; struct vnet *net = id_priv->id.route.addr.dev_addr.net; if (!bind_list) return; mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { cma_ps_remove(net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; while (!list_empty(&id_priv->mc_list)) { mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); if (rdma_cap_ib_mcast(id_priv->cma_dev->device, id_priv->id.port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else { if (mc->igmp_joined) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct net_device *ndev = NULL; if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (ndev) { cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); dev_put(ndev); } } kref_put(&mc->mcref, release_mc); } } } void rdma_destroy_id(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; enum rdma_cm_state state; id_priv = container_of(id, struct rdma_id_private, id); state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); /* * Wait for any active callback to finish. New callbacks will find * the id_priv state set to destroying and abort. */ mutex_lock(&id_priv->handler_mutex); mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); } cma_leave_mc_groups(id_priv); cma_release_dev(id_priv); } cma_release_port(id_priv); cma_deref_id(id_priv); wait_for_completion(&id_priv->comp); if (id_priv->internal_id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); static int cma_rep_recv(struct rdma_id_private *id_priv) { int ret; ret = cma_modify_qp_rtr(id_priv, NULL); if (ret) goto reject; ret = cma_modify_qp_rts(id_priv, NULL); if (ret) goto reject; ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; return 0; reject: cma_modify_qp_err(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; } static void cma_set_rep_event_data(struct rdma_cm_event *event, struct ib_cm_rep_event_param *rep_data, void *private_data) { event->param.conn.private_data = private_data; event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; event->param.conn.responder_resources = rep_data->responder_resources; event->param.conn.initiator_depth = rep_data->initiator_depth; event->param.conn.flow_control = rep_data->flow_control; event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; event->param.conn.srq = rep_data->srq; event->param.conn.qp_num = rep_data->remote_qpn; } static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; int ret = 0; mutex_lock(&id_priv->handler_mutex); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && id_priv->state != RDMA_CM_CONNECT) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && id_priv->state != RDMA_CM_DISCONNECT)) goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : RDMA_CM_EVENT_ESTABLISHED; } else { event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; } cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, ib_event->private_data); break; case IB_CM_RTU_RECEIVED: case IB_CM_USER_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; break; case IB_CM_DREQ_ERROR: event.status = -ETIMEDOUT; /* fall through */ case IB_CM_DREQ_RECEIVED: case IB_CM_DREP_RECEIVED: if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_DISCONNECT)) goto out; event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IB_CM_TIMEWAIT_EXIT: event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; break; case IB_CM_MRA_RECEIVED: /* ignore event */ goto out; case IB_CM_REJ_RECEIVED: cma_modify_qp_err(id_priv); event.status = ib_event->param.rej_rcvd.reason; event.event = RDMA_CM_EVENT_REJECTED; event.param.conn.private_data = ib_event->private_data; event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; const __be64 service_id = ib_event->param.req_rcvd.primary_path->service_id; int ret; id = rdma_create_id(listen_id->route.addr.dev_addr.net, listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; id_priv = container_of(id, struct rdma_id_private, id); if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, service_id)) goto err; rt = &id->route; rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); if (ret) goto err; } else { if (!cma_protocol_roce(listen_id) && cma_any_addr(cma_src_addr(id_priv))) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); if (ret) goto err; } } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); return NULL; } static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; struct vnet *net = listen_id->route.addr.dev_addr.net; int ret; id = rdma_create_id(net, listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; id_priv = container_of(id, struct rdma_id_private, id); if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, ib_event->param.sidr_req_rcvd.service_id)) goto err; if (net_dev) { ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); if (ret) goto err; } else { if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr); if (ret) goto err; } } id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); return NULL; } static void cma_set_req_event_data(struct rdma_cm_event *event, struct ib_cm_req_event_param *req_data, void *private_data, int offset) { event->param.conn.private_data = (char *)private_data + offset; event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; event->param.conn.responder_resources = req_data->responder_resources; event->param.conn.initiator_depth = req_data->initiator_depth; event->param.conn.flow_control = req_data->flow_control; event->param.conn.retry_count = req_data->retry_count; event->param.conn.rnr_retry_count = req_data->rnr_retry_count; event->param.conn.srq = req_data->srq; event->param.conn.qp_num = req_data->remote_qpn; } static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && (id->qp_type == IB_QPT_UD)) || (!id->qp_type)); } static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; int offset, ret; listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) return PTR_ERR(listen_id); if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; } mutex_lock(&listen_id->handler_mutex); if (listen_id->state != RDMA_CM_LISTEN) { ret = -ECONNABORTED; goto err1; } memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); event.param.ud.private_data = (char *)ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } if (!conn_id) { ret = -ENOMEM; goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_acquire_dev(conn_id, listen_id); if (ret) goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) goto err3; /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(&lock); if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); if (net_dev) dev_put(net_dev); return 0; err3: cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); err1: mutex_unlock(&listen_id->handler_mutex); if (conn_id) rdma_destroy_id(&conn_id->id); net_dev_put: if (net_dev) dev_put(net_dev); return ret; } __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) { if (addr->sa_family == AF_IB) return ((struct sockaddr_ib *) addr)->sib_sid; return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); } EXPORT_SYMBOL(rdma_get_service_id); static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; struct rdma_cm_event event; int ret = 0; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) goto out; memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: memcpy(cma_src_addr(id_priv), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(id_priv), raddr, rdma_addr_size(raddr)); switch (iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; break; case -ECONNRESET: case -ECONNREFUSED: event.event = RDMA_CM_EVENT_REJECTED; break; case -ETIMEDOUT: event.event = RDMA_CM_EVENT_UNREACHABLE; break; default: event.event = RDMA_CM_EVENT_CONNECT_ERROR; break; } break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; break; default: BUG_ON(1); } event.status = iw_event->status; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); if (listen_id->state != RDMA_CM_LISTEN) goto out; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } ret = cma_acquire_dev(conn_id, listen_id); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } conn_id->cm_id.iw = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_iw_handler; memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); goto out; } mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); out: mutex_unlock(&listen_id->handler_mutex); return ret; } static int cma_ib_listen(struct rdma_id_private *id_priv) { struct sockaddr *addr; struct ib_cm_id *id; __be64 svc_id; addr = cma_src_addr(id_priv); svc_id = rdma_get_service_id(&id_priv->id, addr); id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.ib = id; return 0; } static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; struct iw_cm_id *id; id = iw_create_cm_id(id_priv->id.device, iw_conn_req_handler, id_priv); if (IS_ERR(id)) return PTR_ERR(id); id->tos = id_priv->tos; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); ret = iw_cm_listen(id_priv->cm_id.iw, backlog); if (ret) { iw_destroy_cm_id(id_priv->cm_id.iw); id_priv->cm_id.iw = NULL; } return ret; } static int cma_listen_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_id_private *id_priv = id->context; id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; return id_priv->id.event_handler(id, event); } static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; struct vnet *net = id_priv->id.route.addr.dev_addr.net; int ret; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); } void rdma_set_service_type(struct rdma_cm_id *id, int tos) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); id_priv->tos = (u8) tos; } EXPORT_SYMBOL(rdma_set_service_type); static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, void *context) { struct cma_work *work = context; struct rdma_route *route; route = &work->id->id.route; if (!status) { route->num_paths = 1; *route->path_rec = *path_rec; } else { work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; work->event.status = status; } queue_work(cma_wq, &work->work); } static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct ib_sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; struct sockaddr_ib *sib; memset(&path_rec, 0, sizeof path_rec); rdma_addr_get_sgid(dev_addr, &path_rec.sgid); rdma_addr_get_dgid(dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; switch (cma_family(id_priv)) { case AF_INET: path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); comp_mask |= IB_SA_PATH_REC_QOS_CLASS; break; case AF_INET6: sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; break; case AF_IB: sib = (struct sockaddr_ib *) cma_src_addr(id_priv); path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; break; } id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, comp_mask, timeout_ms, GFP_KERNEL, cma_query_handler, work, &id_priv->query); return (id_priv->query_id < 0) ? id_priv->query_id : 0; } static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); struct rdma_id_private *id_priv = work->id; int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); kfree(work); } static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) { struct rdma_route *route = &id_priv->id.route; struct cma_work *work; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); if (!route->path_rec) { ret = -ENOMEM; goto err1; } ret = cma_query_ib_route(id_priv, timeout_ms, work); if (ret) goto err2; return 0; err2: kfree(route->path_rec); route->path_rec = NULL; err1: kfree(work); return ret; } int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_RESOLVED)) return -EINVAL; id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, GFP_KERNEL); if (!id->route.path_rec) { ret = -ENOMEM; goto err; } id->route.num_paths = num_paths; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_set_ib_paths); static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) { struct cma_work *work; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; queue_work(cma_wq, &work->work); return 0; } static int iboe_tos_to_sl(struct net_device *ndev, int tos) { /* get service level, SL, from IPv4 type of service, TOS */ int sl = (tos >> 5) & 0x7; /* final mappings are done by the vendor specific drivers */ return sl; } static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, unsigned long supported_gids, enum ib_gid_type default_gid) { if ((network_type == RDMA_NETWORK_IPV4 || network_type == RDMA_NETWORK_IPV6) && test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) return IB_GID_TYPE_ROCE_UDP_ENCAP; return default_gid; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; struct rdma_addr *addr = &route->addr; struct cma_work *work; int ret; struct net_device *ndev = NULL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); if (!route->path_rec) { ret = -ENOMEM; goto err1; } route->num_paths = 1; if (addr->dev_addr.bound_dev_if) { unsigned long supported_gids; ndev = dev_get_by_index(addr->dev_addr.net, addr->dev_addr.bound_dev_if); if (!ndev) { ret = -ENODEV; goto err2; } route->path_rec->net = ndev->if_vnet; route->path_rec->ifindex = ndev->if_index; supported_gids = roce_gid_type_mask_support(id_priv->id.device, id_priv->id.port_num); route->path_rec->gid_type = cma_route_gid_type(addr->dev_addr.network, supported_gids, id_priv->gid_type); } if (!ndev) { ret = -ENODEV; goto err2; } memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, &route->path_rec->dgid); /* Use the hint from IP Stack to select GID Type */ if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) /* TODO: get the hoplimit from the inet/inet6 device */ route->path_rec->hop_limit = addr->dev_addr.hoplimit; else route->path_rec->hop_limit = 1; route->path_rec->reversible = 1; route->path_rec->pkey = cpu_to_be16(0xffff); route->path_rec->mtu_selector = IB_SA_EQ; route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); route->path_rec->traffic_class = id_priv->tos; route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; } work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; work->event.status = 0; queue_work(cma_wq, &work->work); return 0; err2: kfree(route->path_rec); route->path_rec = NULL; err1: kfree(work); return ret; } int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); if (rdma_cap_ib_sa(id->device, id->port_num)) ret = cma_resolve_ib_route(id_priv, timeout_ms); else if (rdma_protocol_roce(id->device, id->port_num)) ret = cma_resolve_iboe_route(id_priv); else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv, timeout_ms); else ret = -ENOSYS; if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_route); static void cma_set_loopback(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); break; case AF_INET6: ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 0, 0, 0, htonl(1)); break; default: ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 0, 0, 0, htonl(1)); break; } } static int cma_bind_loopback(struct rdma_id_private *id_priv) { struct cma_device *cma_dev, *cur_dev; struct ib_port_attr port_attr; union ib_gid gid; u16 pkey; int ret; u8 p; cma_dev = NULL; mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cur_dev->device, 1)) continue; if (!cma_dev) cma_dev = cur_dev; for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!ib_query_port(cur_dev->device, p, &port_attr) && port_attr.state == IB_PORT_ACTIVE) { cma_dev = cur_dev; goto port_found; } } } if (!cma_dev) { ret = -ENODEV; goto out; } p = 1; port_found: ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); if (ret) goto out; ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); if (ret) goto out; id_priv->id.route.addr.dev_addr.dev_type = (rdma_protocol_ib(cma_dev->device, p)) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); cma_set_loopback(cma_src_addr(id_priv)); out: mutex_unlock(&lock); return ret; } static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; struct rdma_cm_event event; memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) goto out; memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv, NULL); if (status) { if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ADDR_BOUND)) goto out; event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); } static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; union ib_gid gid; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; if (!id_priv->cma_dev) { ret = cma_bind_loopback(id_priv); if (ret) goto err; } rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; err: kfree(work); return ret; } static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) { struct cma_work *work; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; if (!id_priv->cma_dev) { ret = cma_resolve_ib_dev(id_priv); if (ret) goto err; } rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; err: kfree(work); return ret; } static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr) { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; if (dst_addr->sa_family == AF_INET6) { struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr)) id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; } else if (dst_addr->sa_family == AF_IB) { ((struct sockaddr_ib *) src_addr)->sib_pkey = ((struct sockaddr_ib *) dst_addr)->sib_pkey; } } return rdma_bind_addr(id, src_addr); } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); if (ret) return ret; } if (cma_family(id_priv) != dst_addr->sa_family) return -EINVAL; if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { if (dst_addr->sa_family == AF_IB) { ret = cma_resolve_ib_addr(id_priv); } else { ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr); if (ret) goto err; ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, id_priv); } } if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_addr); int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; unsigned long flags; int ret; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); if (reuse || id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { ret = -EINVAL; } spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } EXPORT_SYMBOL(rdma_set_reuseaddr); int rdma_set_afonly(struct rdma_cm_id *id, int afonly) { struct rdma_id_private *id_priv; unsigned long flags; int ret; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { id_priv->options |= (1 << CMA_OPTION_AFONLY); id_priv->afonly = afonly; ret = 0; } else { ret = -EINVAL; } spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } EXPORT_SYMBOL(rdma_set_afonly); static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { struct sockaddr *addr; struct sockaddr_ib *sib; u64 sid, mask; __be16 port; addr = cma_src_addr(id_priv); port = htons(bind_list->port); switch (addr->sa_family) { case AF_INET: ((struct sockaddr_in *) addr)->sin_port = port; break; case AF_INET6: ((struct sockaddr_in6 *) addr)->sin6_port = port; break; case AF_IB: sib = (struct sockaddr_ib *) addr; sid = be64_to_cpu(sib->sib_sid); mask = be64_to_cpu(sib->sib_sid_mask); sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); sib->sib_sid_mask = cpu_to_be64(~0ULL); break; } id_priv->bind_list = bind_list; hlist_add_head(&id_priv->node, &bind_list->owners); } static int cma_alloc_port(enum rdma_port_space ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; int ret; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, snum); if (ret < 0) goto err; bind_list->ps = ps; bind_list->port = (unsigned short)ret; cma_bind_port(bind_list, id_priv); return 0; err: kfree(bind_list); return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } static int cma_alloc_any_port(enum rdma_port_space ps, struct rdma_id_private *id_priv) { static unsigned int last_used_port; int low, high, remaining; unsigned int rover; struct vnet *net = id_priv->id.route.addr.dev_addr.net; u32 rand; inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; get_random_bytes(&rand, sizeof(rand)); rover = rand % remaining + low; retry: if (last_used_port != rover && !cma_ps_find(net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid * re-using same port immediately after it is closed. */ if (!ret) last_used_port = rover; if (ret != -EADDRNOTAVAIL) return ret; } if (--remaining) { rover++; if ((rover < low) || (rover > high)) rover = low; goto retry; } return -EADDRNOTAVAIL; } /* * Check that the requested port is available. This is called when trying to * bind to a specific port, or when trying to listen on a bound port. In * the latter case, the provided id_priv may already be on the bind_list, but * we still need to check that it's okay to start listening. */ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv, uint8_t reuseaddr) { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && cur_id->reuseaddr) continue; cur_addr = cma_src_addr(cur_id); if (id_priv->afonly && cur_id->afonly && (addr->sa_family != cur_addr->sa_family)) continue; if (cma_any_addr(addr) || cma_any_addr(cur_addr)) return -EADDRNOTAVAIL; if (!cma_addr_cmp(addr, cur_addr)) return -EADDRINUSE; } return 0; } static int cma_use_port(enum rdma_port_space ps, struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; unsigned short snum; int ret; snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < IPPORT_RESERVED && priv_check(curthread, PRIV_NETINET_BINDANY) != 0) return -EACCES; bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); if (!ret) cma_bind_port(bind_list, id_priv); } return ret; } static int cma_bind_listen(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; int ret = 0; mutex_lock(&lock); if (bind_list->owners.first->next) ret = cma_check_port(bind_list, id_priv, 0); mutex_unlock(&lock); return ret; } static enum rdma_port_space cma_select_inet_ps( struct rdma_id_private *id_priv) { switch (id_priv->id.ps) { case RDMA_PS_TCP: case RDMA_PS_UDP: case RDMA_PS_IPOIB: case RDMA_PS_IB: return id_priv->id.ps; default: return 0; } } static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) { enum rdma_port_space ps = 0; struct sockaddr_ib *sib; u64 sid_ps, mask, sid; sib = (struct sockaddr_ib *) cma_src_addr(id_priv); mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; sid = be64_to_cpu(sib->sib_sid) & mask; if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { sid_ps = RDMA_IB_IP_PS_IB; ps = RDMA_PS_IB; } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && (sid == (RDMA_IB_IP_PS_TCP & mask))) { sid_ps = RDMA_IB_IP_PS_TCP; ps = RDMA_PS_TCP; } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && (sid == (RDMA_IB_IP_PS_UDP & mask))) { sid_ps = RDMA_IB_IP_PS_UDP; ps = RDMA_PS_UDP; } if (ps) { sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | be64_to_cpu(sib->sib_sid_mask)); } return ps; } static int cma_get_port(struct rdma_id_private *id_priv) { enum rdma_port_space ps; int ret; if (cma_family(id_priv) != AF_IB) ps = cma_select_inet_ps(id_priv); else ps = cma_select_ib_ps(id_priv); if (!ps) return -EPROTONOSUPPORT; mutex_lock(&lock); if (cma_any_port(cma_src_addr(id_priv))) ret = cma_alloc_any_port(ps, id_priv); else ret = cma_use_port(ps, id_priv); mutex_unlock(&lock); return ret; } static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, struct sockaddr *addr) { #ifdef INET6 struct sockaddr_in6 sin6; if (addr->sa_family != AF_INET6) return 0; sin6 = *(struct sockaddr_in6 *)addr; if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) { bool failure; CURVNET_SET_QUIET(dev_addr->net); failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0; CURVNET_RESTORE(); /* check if IPv6 scope ID is not set */ if (failure) return -EINVAL; dev_addr->bound_dev_if = sin6.sin6_scope_id; } #endif return 0; } int rdma_listen(struct rdma_cm_id *id, int backlog) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { id->route.addr.src_addr.ss_family = AF_INET; ret = rdma_bind_addr(id, cma_src_addr(id_priv)); if (ret) return ret; } if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) return -EINVAL; if (id_priv->reuseaddr) { ret = cma_bind_listen(id_priv); if (ret) goto err; } id_priv->backlog = backlog; if (id->device) { if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) goto err; } else if (rdma_cap_iw_cm(id->device, 1)) { ret = cma_iw_listen(id_priv, backlog); if (ret) goto err; } else { ret = -ENOSYS; goto err; } } else cma_listen_on_all(id_priv); return 0; err: id_priv->backlog = 0; cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); return ret; } EXPORT_SYMBOL(rdma_listen); int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; int ret; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { ret = cma_translate_addr(addr, &id->route.addr.dev_addr); if (ret) goto err1; ret = cma_acquire_dev(id_priv, NULL); if (ret) goto err1; } if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { if (addr->sa_family == AF_INET) id_priv->afonly = 1; #ifdef INET6 else if (addr->sa_family == AF_INET6) { CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); id_priv->afonly = V_ip6_v6only; CURVNET_RESTORE(); } #endif } ret = cma_get_port(id_priv); if (ret) goto err2; return 0; err2: if (id_priv->cma_dev) cma_release_dev(id_priv); err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) { struct cma_hdr *cma_hdr; cma_hdr = hdr; cma_hdr->cma_version = CMA_VERSION; if (cma_family(id_priv) == AF_INET) { struct sockaddr_in *src4, *dst4; src4 = (struct sockaddr_in *) cma_src_addr(id_priv); dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); cma_set_ip_ver(cma_hdr, 4); cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; cma_hdr->port = src4->sin_port; } else if (cma_family(id_priv) == AF_INET6) { struct sockaddr_in6 *src6, *dst6; src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); cma_set_ip_ver(cma_hdr, 6); cma_hdr->src_addr.ip6 = src6->sin6_addr; cma_hdr->dst_addr.ip6 = dst6->sin6_addr; cma_hdr->port = src6->sin6_port; cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6); cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6); } return 0; } static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -ETIMEDOUT; break; case IB_CM_SIDR_REP_RECEIVED: event.param.ud.private_data = ib_event->private_data; event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; if (rep->status != IB_SIDR_SUCCESS) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = ib_event->param.sidr_rep_rcvd.status; break; } ret = cma_set_qkey(id_priv, rep->qkey); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = ret; break; } ret = ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, &event.param.ud.ah_attr); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = ret; break; } event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; event.status = 0; break; default: pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_sidr_req_param req; struct ib_cm_id *id; void *private_data; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; if (req.private_data_len) { private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; } else { private_data = NULL; } if (conn_param->private_data && conn_param->private_data_len) memcpy((char *)private_data + offset, conn_param->private_data, conn_param->private_data_len); if (private_data) { ret = cma_format_hdr(private_data, id_priv); if (ret) goto out; req.private_data = private_data; } id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, id_priv); if (IS_ERR(id)) { ret = PTR_ERR(id); goto out; } id_priv->cm_id.ib = id; req.path = id_priv->id.route.path_rec; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); id_priv->cm_id.ib = NULL; } out: kfree(private_data); return ret; } static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_req_param req; struct rdma_route *route; void *private_data; struct ib_cm_id *id; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; if (req.private_data_len) { private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; } else { private_data = NULL; } if (conn_param->private_data && conn_param->private_data_len) memcpy((char *)private_data + offset, conn_param->private_data, conn_param->private_data_len); id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); if (IS_ERR(id)) { ret = PTR_ERR(id); goto out; } id_priv->cm_id.ib = id; route = &id_priv->id.route; if (private_data) { ret = cma_format_hdr(private_data, id_priv); if (ret) goto out; req.private_data = private_data; } req.primary_path = &route->path_rec[0]; if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; req.flow_control = conn_param->flow_control; req.retry_count = min_t(u8, 7, conn_param->retry_count); req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: if (ret && !IS_ERR(id)) { ib_destroy_cm_id(id); id_priv->cm_id.ib = NULL; } kfree(private_data); return ret; } static int cma_connect_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_id *cm_id; int ret; struct iw_cm_conn_param iw_param; cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id->tos = id_priv->tos; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), rdma_addr_size(cma_dst_addr(id_priv))); ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; if (conn_param) { iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; } else { memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; } ret = iw_cm_connect(cm_id, &iw_param); out: if (ret) { iw_destroy_cm_id(cm_id); id_priv->cm_id.iw = NULL; } return ret; } int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp) { id_priv->qp_num = conn_param->qp_num; id_priv->srq = conn_param->srq; } if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_connect_iw(id_priv, conn_param); else ret = -ENOSYS; if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_connect); static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_rep_param rep; int ret; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; ret = cma_modify_qp_rts(id_priv, conn_param); if (ret) goto out; memset(&rep, 0, sizeof rep); rep.qp_num = id_priv->qp_num; rep.starting_psn = id_priv->seq_num; rep.private_data = conn_param->private_data; rep.private_data_len = conn_param->private_data_len; rep.responder_resources = conn_param->responder_resources; rep.initiator_depth = conn_param->initiator_depth; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; } static int cma_accept_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_conn_param iw_param; int ret; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; if (id_priv->id.qp) { iw_param.qpn = id_priv->qp_num; } else iw_param.qpn = conn_param->qp_num; return iw_cm_accept(id_priv->cm_id.iw, &iw_param); } static int cma_send_sidr_rep(struct rdma_id_private *id_priv, enum ib_cm_sidr_status status, u32 qkey, const void *private_data, int private_data_len) { struct ib_cm_sidr_rep_param rep; int ret; memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { ret = cma_set_qkey(id_priv, qkey); if (ret) return ret; rep.qp_num = id_priv->qp_num; rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); id_priv->owner = task_pid_nr(current); if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp && conn_param) { id_priv->qp_num = conn_param->qp_num; id_priv->srq = conn_param->srq; } if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->qkey, conn_param->private_data, conn_param->private_data_len); else ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 0, NULL, 0); } else { if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); } } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_accept_iw(id_priv, conn_param); else ret = -ENOSYS; if (ret) goto reject; return 0; reject: cma_modify_qp_err(id_priv); rdma_reject(id, NULL, 0); return ret; } EXPORT_SYMBOL(rdma_accept); int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; switch (id->device->node_type) { case RDMA_NODE_IB_CA: ret = ib_cm_notify(id_priv->cm_id.ib, event); break; default: ret = 0; break; } return ret; } EXPORT_SYMBOL(rdma_notify); int rdma_reject(struct rdma_cm_id *id, const void *private_data, u8 private_data_len) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); else ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); } else ret = -ENOSYS; return ret; } EXPORT_SYMBOL(rdma_reject); int rdma_disconnect(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); } else ret = -EINVAL; out: return ret; } EXPORT_SYMBOL(rdma_disconnect); static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; struct rdma_cm_event event; int ret = 0; id_priv = mc->id_priv; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_ADDR_BOUND && id_priv->state != RDMA_CM_ADDR_RESOLVED) goto out; if (!status) status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, be16_to_cpu(multicast->rec.mlid)); mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; if (!status) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct net_device *ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); enum ib_gid_type gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; event.event = RDMA_CM_EVENT_MULTICAST_JOIN; ret = ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, &multicast->rec, ndev, gid_type, &event.param.ud.ah_attr); if (ret) event.event = RDMA_CM_EVENT_MULTICAST_ERROR; event.param.ud.qp_num = 0xFFFFFF; event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); if (ndev) dev_put(ndev); } else event.event = RDMA_CM_EVENT_MULTICAST_ERROR; ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; } out: mutex_unlock(&id_priv->handler_mutex); return 0; } static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, union ib_gid *mgid) { unsigned char mc_map[MAX_ADDR_LEN]; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct sockaddr_in *sin = (struct sockaddr_in *) addr; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if ((addr->sa_family == AF_INET6) && ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else if (addr->sa_family == AF_IB) { memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); } else if (addr->sa_family == AF_INET6) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); } else { ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); } } static void cma_query_sa_classport_info_cb(int status, struct ib_class_port_info *rec, void *context) { struct class_port_info_context *cb_ctx = context; WARN_ON(!context); if (status || !rec) { pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", cb_ctx->device->name, cb_ctx->port_num, status); goto out; } memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); out: complete(&cb_ctx->done); } static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, struct ib_class_port_info *class_port_info) { struct class_port_info_context *cb_ctx; int ret; cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); if (!cb_ctx) return -ENOMEM; cb_ctx->device = device; cb_ctx->class_port_info = class_port_info; cb_ctx->port_num = port_num; init_completion(&cb_ctx->done); ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, CMA_QUERY_CLASSPORT_INFO_TIMEOUT, GFP_KERNEL, cma_query_sa_classport_info_cb, cb_ctx, &cb_ctx->sa_query); if (ret < 0) { pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", device->name, port_num, ret); goto out; } wait_for_completion(&cb_ctx->done); out: kfree(cb_ctx); return ret; } static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; struct ib_class_port_info class_port_info; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; ib_addr_get_mgid(dev_addr, &rec.mgid); ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, &rec.mgid, &rec); if (ret) return ret; ret = cma_set_qkey(id_priv, 0); if (ret) return ret; cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = mc->join_state; if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { ret = cma_query_sa_classport_info(id_priv->id.device, id_priv->id.port_num, &class_port_info); if (ret) return ret; if (!(ib_get_cpi_capmask2(&class_port_info) & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { pr_warn("RDMA CM: %s port %u Unable to multicast join\n" "RDMA CM: SM doesn't support Send Only Full Member option\n", id_priv->id.device->name, id_priv->id.port_num); return -EOPNOTSUPP; } } comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | IB_SA_MCMEMBER_REC_FLOW_LABEL | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; if (id_priv->id.ps == RDMA_PS_IPOIB) comp_mask |= IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_RATE_SELECTOR | IB_SA_MCMEMBER_REC_MTU_SELECTOR | IB_SA_MCMEMBER_REC_MTU | IB_SA_MCMEMBER_REC_HOP_LIMIT; mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); return PTR_ERR_OR_ZERO(mc->multicast.ib); } static void iboe_mcast_work_handler(struct work_struct *work) { struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); struct cma_multicast *mc = mw->mc; struct ib_sa_multicast *m = mc->multicast.ib; mc->multicast.ib->context = mc; cma_ib_mc_handler(0, m); kref_put(&mc->mcref, release_mc); kfree(mw); } static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type) { struct sockaddr_in *sin = (struct sockaddr_in *)addr; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if (addr->sa_family == AF_INET6) { memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { mgid->raw[0] = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; mgid->raw[1] = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; mgid->raw[2] = 0; mgid->raw[3] = 0; mgid->raw[4] = 0; mgid->raw[5] = 0; mgid->raw[6] = 0; mgid->raw[7] = 0; mgid->raw[8] = 0; mgid->raw[9] = 0; mgid->raw[10] = 0xff; mgid->raw[11] = 0xff; *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; } } static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct iboe_mcast_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; enum ib_gid_type gid_type; bool send_only; send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); if (cma_zero_addr((struct sockaddr *)&mc->addr)) return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); if (!mc->multicast.ib) { err = -ENOMEM; goto out1; } gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); if (id_priv->id.ps == RDMA_PS_UDP) mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!ndev) { err = -ENODEV; goto out2; } mc->multicast.ib->rec.rate = iboe_get_rate(ndev); mc->multicast.ib->rec.hop_limit = 1; mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; if (!send_only) { err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); if (!err) mc->igmp_joined = true; } } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) err = -ENOTSUPP; } dev_put(ndev); if (err || !mc->multicast.ib->rec.mtu) { if (!err) err = -EINVAL; goto out2; } rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &mc->multicast.ib->rec.port_gid); work->id = id_priv; work->mc = mc; INIT_WORK(&work->work, iboe_mcast_work_handler); kref_get(&mc->mcref); queue_work(cma_wq, &work->work); return 0; out2: kfree(mc->multicast.ib); out1: kfree(work); return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, u8 join_state, void *context) { struct rdma_id_private *id_priv; struct cma_multicast *mc; int ret; if (!id->device) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) return -EINVAL; mc = kmalloc(sizeof *mc, GFP_KERNEL); if (!mc) return -ENOMEM; memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; mc->igmp_joined = false; mc->join_state = join_state; spin_lock(&id_priv->lock); list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); if (rdma_protocol_roce(id->device, id->port_num)) { kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); } else if (rdma_cap_ib_mcast(id->device, id->port_num)) ret = cma_join_ib_multicast(id_priv, mc); else ret = -ENOSYS; if (ret) { spin_lock_irq(&id_priv->lock); list_del(&mc->list); spin_unlock_irq(&id_priv->lock); kfree(mc); } return ret; } EXPORT_SYMBOL(rdma_join_multicast); void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; struct cma_multicast *mc; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { list_del(&mc->list); spin_unlock_irq(&id_priv->lock); if (id->qp) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, be16_to_cpu(mc->multicast.ib->rec.mlid)); BUG_ON(id_priv->cma_dev->device != id->device); if (rdma_cap_ib_mcast(id->device, id->port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else if (rdma_protocol_roce(id->device, id->port_num)) { if (mc->igmp_joined) { struct rdma_dev_addr *dev_addr = &id->route.addr.dev_addr; struct net_device *ndev = NULL; if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (ndev) { cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); dev_put(ndev); } mc->igmp_joined = false; } kref_put(&mc->mcref, release_mc); } return; } } spin_unlock_irq(&id_priv->lock); } EXPORT_SYMBOL(rdma_leave_multicast); static int sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) { struct cma_device *cma_dev = arg1; const int port = arg2; char buf[64]; int error; strlcpy(buf, ib_cache_gid_type_str( cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) goto done; error = ib_cache_gid_parse_type_str(buf); if (error < 0) { error = EINVAL; goto done; } cma_set_default_gid_type(cma_dev, port, error); error = 0; done: return (error); } static void cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; struct rdma_id_private *id_priv; unsigned int i; cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); if (!cma_dev) return; sysctl_ctx_init(&cma_dev->sysctl_ctx); cma_dev->device = device; cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_gid_type), GFP_KERNEL); if (!cma_dev->default_gid_type) { kfree(cma_dev); return; } for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { unsigned long supported_gids; unsigned int default_gid_type; supported_gids = roce_gid_type_mask_support(device, i); if (WARN_ON(!supported_gids)) { /* set something valid */ default_gid_type = 0; } else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) { /* prefer RoCEv2, if supported */ default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP; } else { default_gid_type = find_first_bit(&supported_gids, BITS_PER_LONG); } cma_dev->default_gid_type[i - rdma_start_port(device)] = default_gid_type; } init_completion(&cma_dev->comp); atomic_set(&cma_dev->refcount, 1); INIT_LIST_HEAD(&cma_dev->id_list); ib_set_client_data(device, &cma_client, cma_dev); mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); list_for_each_entry(id_priv, &listen_any_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { char buf[64]; snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, SYSCTL_CHILDREN(device->ports_parent->parent->oidp), OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, cma_dev, i, &sysctl_cma_default_roce_mode, "A", "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); } } static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; enum rdma_cm_state state; int ret = 0; /* Record that we want to remove the device */ state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); if (state == RDMA_CM_DESTROYING) return 0; cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; ret = id_priv->id.event_handler(&id_priv->id, &event); out: mutex_unlock(&id_priv->handler_mutex); return ret; } static void cma_process_remove(struct cma_device *cma_dev) { struct rdma_id_private *id_priv; int ret; mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { id_priv = list_entry(cma_dev->id_list.next, struct rdma_id_private, list); list_del(&id_priv->listen_list); list_del_init(&id_priv->list); atomic_inc(&id_priv->refcount); mutex_unlock(&lock); ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); cma_deref_id(id_priv); if (ret) rdma_destroy_id(&id_priv->id); mutex_lock(&lock); } mutex_unlock(&lock); cma_deref_dev(cma_dev); wait_for_completion(&cma_dev->comp); } static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; if (!cma_dev) return; mutex_lock(&lock); list_del(&cma_dev->list); mutex_unlock(&lock); cma_process_remove(cma_dev); sysctl_ctx_free(&cma_dev->sysctl_ctx); kfree(cma_dev->default_gid_type); kfree(cma_dev); } static void cma_init_vnet(void *arg) { struct cma_pernet *pernet = &VNET(cma_pernet); idr_init(&pernet->tcp_ps); idr_init(&pernet->udp_ps); idr_init(&pernet->ipoib_ps); idr_init(&pernet->ib_ps); } VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); static void cma_destroy_vnet(void *arg) { struct cma_pernet *pernet = &VNET(cma_pernet); idr_destroy(&pernet->tcp_ps); idr_destroy(&pernet->udp_ps); idr_destroy(&pernet->ipoib_ps); idr_destroy(&pernet->ib_ps); } VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); static int __init cma_init(void) { int ret; cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); ret = ib_register_client(&cma_client); if (ret) goto err; cma_configfs_init(); return 0; err: rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; } static void __exit cma_cleanup(void) { cma_configfs_exit(); ib_unregister_client(&cma_client); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); } module_init(cma_init); module_exit(cma_cleanup); Index: head/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c =================================================================== --- head/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c (revision 338540) +++ head/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c (revision 338541) @@ -1,153 +1,153 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, struct ib_ah_attr *src) { memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); dst->grh.flow_label = src->grh.flow_label; dst->grh.sgid_index = src->grh.sgid_index; dst->grh.hop_limit = src->grh.hop_limit; dst->grh.traffic_class = src->grh.traffic_class; memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); dst->dlid = src->dlid; dst->sl = src->sl; dst->src_path_bits = src->src_path_bits; dst->static_rate = src->static_rate; dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; dst->port_num = src->port_num; dst->reserved = 0; } EXPORT_SYMBOL(ib_copy_ah_attr_to_user); void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src) { dst->qp_state = src->qp_state; dst->cur_qp_state = src->cur_qp_state; dst->path_mtu = src->path_mtu; dst->path_mig_state = src->path_mig_state; dst->qkey = src->qkey; dst->rq_psn = src->rq_psn; dst->sq_psn = src->sq_psn; dst->dest_qp_num = src->dest_qp_num; dst->qp_access_flags = src->qp_access_flags; dst->max_send_wr = src->cap.max_send_wr; dst->max_recv_wr = src->cap.max_recv_wr; dst->max_send_sge = src->cap.max_send_sge; dst->max_recv_sge = src->cap.max_recv_sge; dst->max_inline_data = src->cap.max_inline_data; ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); dst->pkey_index = src->pkey_index; dst->alt_pkey_index = src->alt_pkey_index; dst->en_sqd_async_notify = src->en_sqd_async_notify; dst->sq_draining = src->sq_draining; dst->max_rd_atomic = src->max_rd_atomic; dst->max_dest_rd_atomic = src->max_dest_rd_atomic; dst->min_rnr_timer = src->min_rnr_timer; dst->port_num = src->port_num; dst->timeout = src->timeout; dst->retry_cnt = src->retry_cnt; dst->rnr_retry = src->rnr_retry; dst->alt_port_num = src->alt_port_num; dst->alt_timeout = src->alt_timeout; memset(dst->reserved, 0, sizeof(dst->reserved)); } EXPORT_SYMBOL(ib_copy_qp_attr_to_user); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, struct ib_sa_path_rec *src) { memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); dst->dlid = src->dlid; dst->slid = src->slid; dst->raw_traffic = src->raw_traffic; dst->flow_label = src->flow_label; dst->hop_limit = src->hop_limit; dst->traffic_class = src->traffic_class; dst->reversible = src->reversible; dst->numb_path = src->numb_path; dst->pkey = src->pkey; dst->sl = src->sl; dst->mtu_selector = src->mtu_selector; dst->mtu = src->mtu; dst->rate_selector = src->rate_selector; dst->rate = src->rate; dst->packet_life_time = src->packet_life_time; dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; } EXPORT_SYMBOL(ib_copy_path_rec_to_user); void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, struct ib_user_path_rec *src) { memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); dst->dlid = src->dlid; dst->slid = src->slid; dst->raw_traffic = src->raw_traffic; dst->flow_label = src->flow_label; dst->hop_limit = src->hop_limit; dst->traffic_class = src->traffic_class; dst->reversible = src->reversible; dst->numb_path = src->numb_path; dst->pkey = src->pkey; dst->sl = src->sl; dst->mtu_selector = src->mtu_selector; dst->mtu = src->mtu; dst->rate_selector = src->rate_selector; dst->rate = src->rate; dst->packet_life_time = src->packet_life_time; dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; memset(dst->dmac, 0, sizeof(dst->dmac)); - dst->net = NULL; + dst->net = TD_TO_VNET(curthread); dst->ifindex = 0; dst->gid_type = IB_GID_TYPE_IB; } EXPORT_SYMBOL(ib_copy_path_rec_from_user); Index: head/sys/ofed/include/rdma/ib_addr.h =================================================================== --- head/sys/ofed/include/rdma/ib_addr.h (revision 338540) +++ head/sys/ofed/include/rdma/ib_addr.h (revision 338541) @@ -1,358 +1,362 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $FreeBSD$ */ #if !defined(IB_ADDR_H) #define IB_ADDR_H #include #include #include #include #include #include #include #include #include #include #include #include #include struct rdma_addr_client { atomic_t refcount; struct completion comp; }; /** * rdma_addr_register_client - Register an address client. */ void rdma_addr_register_client(struct rdma_addr_client *client); /** * rdma_addr_unregister_client - Deregister an address client. * @client: Client object to deregister. */ void rdma_addr_unregister_client(struct rdma_addr_client *client); /** * struct rdma_dev_addr - Contains resolved RDMA hardware addresses * @src_dev_addr: Source MAC address. * @dst_dev_addr: Destination MAC address. * @broadcast: Broadcast address of the device. * @dev_type: The interface hardware type of the device. * @bound_dev_if: An optional device interface index. * @transport: The transport type used. * @net: Network namespace containing the bound_dev_if net_dev. */ struct vnet; struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; struct vnet *net; enum rdma_network_type network; int hoplimit; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. * * The dev_addr->net and dev_addr->bound_dev_if fields must be initialized. */ int rdma_translate_ip(const struct sockaddr *addr, struct rdma_dev_addr *dev_addr); /** * rdma_resolve_ip - Resolve source and destination IP addresses to * RDMA hardware addresses. * @client: Address client associated with request. * @src_addr: An optional source address to use in the resolution. If a * source address is not provided, a usable address will be returned via * the callback. * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has * been invoked. The net field of the addr struct must be valid. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context); int rdma_resolve_ip_route(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr); void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); int rdma_addr_size(struct sockaddr *addr); int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct sockaddr_storage *addr); int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, u8 *smac, struct net_device *dev, int *hoplimit); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; } static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) { dev_addr->broadcast[8] = pkey >> 8; dev_addr->broadcast[9] = (unsigned char) pkey; } static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { uint16_t tag; if (dev->if_type == IFT_ETHER && dev->if_pcp != IFNET_PCP_NONE) return 0x0000; /* prio-tagged traffic */ if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0) return 0xffff; return tag; } static inline int rdma_ip2gid(const struct sockaddr *addr, union ib_gid *gid) { switch (addr->sa_family) { case AF_INET: ipv6_addr_set_v4mapped(((const struct sockaddr_in *) addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; case AF_INET6: memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16); /* make sure scope ID gets zeroed inside GID */ if (IN6_IS_SCOPE_LINKLOCAL((struct in6_addr *)gid->raw) || IN6_IS_ADDR_MC_INTFACELOCAL((struct in6_addr *)gid->raw)) { gid->raw[2] = 0; gid->raw[3] = 0; } break; default: return -EINVAL; } return 0; } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid) { if (ipv6_addr_v4mapped((const struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin_len = sizeof(*out_in); out_in->sin_family = AF_INET; memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4); } else { struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin6_len = sizeof(*out_in); out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } } static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { struct net_device *dev; struct ifaddr *ifa; - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); +#ifdef VIMAGE + if (dev_addr->net == NULL) + return; +#endif + dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (dev) { CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET) continue; ipv6_addr_set_v4mapped(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; } dev_put(dev); } } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { if (dev_addr->transport == RDMA_TRANSPORT_IB && dev_addr->dev_type != ARPHRD_INFINIBAND) iboe_addr_get_sgid(dev_addr, gid); else memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline enum ib_mtu iboe_get_mtu(int mtu) { /* * reduce IB headers from effective IBoE MTU. 28 stands for * atomic header which is the biggest possible header after BTH */ mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) return IB_MTU_2048; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) return IB_MTU_1024; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) return IB_MTU_512; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) return IB_MTU_256; else return 0; } static inline int iboe_get_rate(struct net_device *dev) { uint64_t baudrate = dev->if_baudrate; #ifdef if_baudrate_pf int exp; for (exp = dev->if_baudrate_pf; exp > 0; exp--) baudrate *= 10; #endif if (baudrate >= IF_Gbps(40)) return IB_RATE_40_GBPS; else if (baudrate >= IF_Gbps(30)) return IB_RATE_30_GBPS; else if (baudrate >= IF_Gbps(20)) return IB_RATE_20_GBPS; else if (baudrate >= IF_Gbps(10)) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; } static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && addr->s6_addr32[1] == 0) return 1; return 0; } static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) { memcpy(mac, &addr->s6_addr[8], 3); memcpy(mac + 3, &addr->s6_addr[13], 3); mac[0] ^= 2; } static inline int rdma_is_multicast_addr(struct in6_addr *addr) { __be32 ipv4_addr; if (addr->s6_addr[0] == 0xff) return 1; ipv4_addr = addr->s6_addr32[3]; return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; mac[0] = 0x33; mac[1] = 0x33; for (i = 2; i < 6; ++i) mac[i] = addr->s6_addr[i + 10]; } static inline u16 rdma_get_vlan_id(union ib_gid *dgid) { u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(struct net_device *dev) { if (dev->if_type == IFT_ETHER && dev->if_pcp != IFNET_PCP_NONE) return dev; /* prio-tagged traffic */ return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev)); } #endif /* IB_ADDR_H */ Index: head/sys/ofed/include/rdma/ib_cm.h =================================================================== --- head/sys/ofed/include/rdma/ib_cm.h (revision 338540) +++ head/sys/ofed/include/rdma/ib_cm.h (revision 338541) @@ -1,611 +1,625 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $FreeBSD$ */ #if !defined(IB_CM_H) #define IB_CM_H #include #include /* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ extern struct class cm_class; enum ib_cm_state { IB_CM_IDLE, IB_CM_LISTEN, IB_CM_REQ_SENT, IB_CM_REQ_RCVD, IB_CM_MRA_REQ_SENT, IB_CM_MRA_REQ_RCVD, IB_CM_REP_SENT, IB_CM_REP_RCVD, IB_CM_MRA_REP_SENT, IB_CM_MRA_REP_RCVD, IB_CM_ESTABLISHED, IB_CM_DREQ_SENT, IB_CM_DREQ_RCVD, IB_CM_TIMEWAIT, IB_CM_SIDR_REQ_SENT, IB_CM_SIDR_REQ_RCVD }; enum ib_cm_lap_state { IB_CM_LAP_UNINIT, IB_CM_LAP_IDLE, IB_CM_LAP_SENT, IB_CM_LAP_RCVD, IB_CM_MRA_LAP_SENT, IB_CM_MRA_LAP_RCVD, }; enum ib_cm_event_type { IB_CM_REQ_ERROR, IB_CM_REQ_RECEIVED, IB_CM_REP_ERROR, IB_CM_REP_RECEIVED, IB_CM_RTU_RECEIVED, IB_CM_USER_ESTABLISHED, IB_CM_DREQ_ERROR, IB_CM_DREQ_RECEIVED, IB_CM_DREP_RECEIVED, IB_CM_TIMEWAIT_EXIT, IB_CM_MRA_RECEIVED, IB_CM_REJ_RECEIVED, IB_CM_LAP_ERROR, IB_CM_LAP_RECEIVED, IB_CM_APR_RECEIVED, IB_CM_SIDR_REQ_ERROR, IB_CM_SIDR_REQ_RECEIVED, IB_CM_SIDR_REP_RECEIVED }; enum ib_cm_data_size { IB_CM_REQ_PRIVATE_DATA_SIZE = 92, IB_CM_MRA_PRIVATE_DATA_SIZE = 222, IB_CM_REJ_PRIVATE_DATA_SIZE = 148, IB_CM_REP_PRIVATE_DATA_SIZE = 196, IB_CM_RTU_PRIVATE_DATA_SIZE = 224, IB_CM_DREQ_PRIVATE_DATA_SIZE = 220, IB_CM_DREP_PRIVATE_DATA_SIZE = 224, IB_CM_REJ_ARI_LENGTH = 72, IB_CM_LAP_PRIVATE_DATA_SIZE = 168, IB_CM_APR_PRIVATE_DATA_SIZE = 148, IB_CM_APR_INFO_LENGTH = 72, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, }; struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; struct ib_sa_path_rec *primary_path; struct ib_sa_path_rec *alternate_path; + /* + * SGID index of the primary path. Currently only + * useful for RoCE. Alternate path GID attributes + * are not yet supported. + */ + u8 ppath_sgid_index; + __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; enum ib_qp_type qp_type; u32 starting_psn; u8 responder_resources; u8 initiator_depth; unsigned int local_cm_response_timeout:5; unsigned int flow_control:1; unsigned int remote_cm_response_timeout:5; unsigned int retry_count:3; unsigned int rnr_retry_count:3; unsigned int srq:1; }; struct ib_cm_rep_event_param { __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; u32 starting_psn; u8 responder_resources; u8 initiator_depth; unsigned int target_ack_delay:5; unsigned int failover_accepted:2; unsigned int flow_control:1; unsigned int rnr_retry_count:3; unsigned int srq:1; }; enum ib_cm_rej_reason { IB_CM_REJ_NO_QP = 1, IB_CM_REJ_NO_EEC = 2, IB_CM_REJ_NO_RESOURCES = 3, IB_CM_REJ_TIMEOUT = 4, IB_CM_REJ_UNSUPPORTED = 5, IB_CM_REJ_INVALID_COMM_ID = 6, IB_CM_REJ_INVALID_COMM_INSTANCE = 7, IB_CM_REJ_INVALID_SERVICE_ID = 8, IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9, IB_CM_REJ_STALE_CONN = 10, IB_CM_REJ_RDC_NOT_EXIST = 11, IB_CM_REJ_INVALID_GID = 12, IB_CM_REJ_INVALID_LID = 13, IB_CM_REJ_INVALID_SL = 14, IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15, IB_CM_REJ_INVALID_HOP_LIMIT = 16, IB_CM_REJ_INVALID_PACKET_RATE = 17, IB_CM_REJ_INVALID_ALT_GID = 18, IB_CM_REJ_INVALID_ALT_LID = 19, IB_CM_REJ_INVALID_ALT_SL = 20, IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21, IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22, IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23, IB_CM_REJ_PORT_CM_REDIRECT = 24, IB_CM_REJ_PORT_REDIRECT = 25, IB_CM_REJ_INVALID_MTU = 26, IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27, IB_CM_REJ_CONSUMER_DEFINED = 28, IB_CM_REJ_INVALID_RNR_RETRY = 29, IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, IB_CM_REJ_INVALID_CLASS_VERSION = 31, IB_CM_REJ_INVALID_FLOW_LABEL = 32, IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 }; struct ib_cm_rej_event_param { enum ib_cm_rej_reason reason; void *ari; u8 ari_length; }; struct ib_cm_mra_event_param { u8 service_timeout; }; struct ib_cm_lap_event_param { struct ib_sa_path_rec *alternate_path; }; enum ib_cm_apr_status { IB_CM_APR_SUCCESS, IB_CM_APR_INVALID_COMM_ID, IB_CM_APR_UNSUPPORTED, IB_CM_APR_REJECT, IB_CM_APR_REDIRECT, IB_CM_APR_IS_CURRENT, IB_CM_APR_INVALID_QPN_EECN, IB_CM_APR_INVALID_LID, IB_CM_APR_INVALID_GID, IB_CM_APR_INVALID_FLOW_LABEL, IB_CM_APR_INVALID_TCLASS, IB_CM_APR_INVALID_HOP_LIMIT, IB_CM_APR_INVALID_PACKET_RATE, IB_CM_APR_INVALID_SL }; struct ib_cm_apr_event_param { enum ib_cm_apr_status ap_status; void *apr_info; u8 info_len; }; struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; __be64 service_id; + + /* + * SGID index of the request. Currently only + * useful for RoCE. + */ + u8 sgid_index; + /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; u16 pkey; }; enum ib_cm_sidr_status { IB_SIDR_SUCCESS, IB_SIDR_UNSUPPORTED, IB_SIDR_REJECT, IB_SIDR_NO_QP, IB_SIDR_REDIRECT, IB_SIDR_UNSUPPORTED_VERSION }; struct ib_cm_sidr_rep_event_param { enum ib_cm_sidr_status status; u32 qkey; u32 qpn; void *info; u8 info_len; }; struct ib_cm_event { enum ib_cm_event_type event; union { struct ib_cm_req_event_param req_rcvd; struct ib_cm_rep_event_param rep_rcvd; /* No data for RTU received events. */ struct ib_cm_rej_event_param rej_rcvd; struct ib_cm_mra_event_param mra_rcvd; struct ib_cm_lap_event_param lap_rcvd; struct ib_cm_apr_event_param apr_rcvd; /* No data for DREQ/DREP received events. */ struct ib_cm_sidr_req_event_param sidr_req_rcvd; struct ib_cm_sidr_rep_event_param sidr_rep_rcvd; enum ib_wc_status send_status; } param; void *private_data; }; #define CM_REQ_ATTR_ID cpu_to_be16(0x0010) #define CM_MRA_ATTR_ID cpu_to_be16(0x0011) #define CM_REJ_ATTR_ID cpu_to_be16(0x0012) #define CM_REP_ATTR_ID cpu_to_be16(0x0013) #define CM_RTU_ATTR_ID cpu_to_be16(0x0014) #define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) #define CM_DREP_ATTR_ID cpu_to_be16(0x0016) #define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) #define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) #define CM_LAP_ATTR_ID cpu_to_be16(0x0019) #define CM_APR_ATTR_ID cpu_to_be16(0x001A) /** * ib_cm_handler - User-defined callback to process communication events. * @cm_id: Communication identifier associated with the reported event. * @event: Information about the communication event. * * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events * generated as a result of listen requests result in the allocation of a * new @cm_id. The new @cm_id is returned to the user through this callback. * Clients are responsible for destroying the new @cm_id. For peer-to-peer * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds * to a user's existing communication identifier. * * Users may not call ib_destroy_cm_id while in the context of this callback; * however, returning a non-zero value instructs the communication manager to * destroy the @cm_id after the callback completes. */ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_event *event); struct ib_cm_id { ib_cm_handler cm_handler; void *context; struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; __be32 remote_id; u32 remote_cm_qpn; /* 1 unless redirected */ }; /** * ib_create_cm_id - Allocate a communication identifier. * @device: Device associated with the cm_id. All related communication will * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. * * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context); /** * ib_destroy_cm_id - Destroy a connection identifier. * @cm_id: Connection identifier to destroy. * * This call blocks until the connection identifier is destroyed. */ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SERVICE_ID_AGN_MASK cpu_to_be64(0xFF00000000000000ULL) #define IB_CM_ASSIGN_SERVICE_ID cpu_to_be64(0x0200000000000000ULL) #define IB_CMA_SERVICE_ID cpu_to_be64(0x0000000001000000ULL) #define IB_CMA_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFF000000ULL) #define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL) #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) /** * ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. * @cm_id: Connection identifier associated with the listen request. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * @service_mask: Mask applied to service ID used to listen across a * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. */ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask); struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, __be64 service_id); struct ib_cm_req_param { struct ib_sa_path_rec *primary_path; struct ib_sa_path_rec *alternate_path; __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; u32 starting_psn; const void *private_data; u8 private_data_len; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; u8 remote_cm_response_timeout; u8 flow_control; u8 local_cm_response_timeout; u8 retry_count; u8 rnr_retry_count; u8 max_cm_retries; u8 srq; }; /** * ib_send_cm_req - Sends a connection request to the remote node. * @cm_id: Connection identifier that will be associated with the * connection request. * @param: Connection request information needed to establish the * connection. */ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param); struct ib_cm_rep_param { u32 qp_num; u32 starting_psn; const void *private_data; u8 private_data_len; u8 responder_resources; u8 initiator_depth; u8 failover_accepted; u8 flow_control; u8 rnr_retry_count; u8 srq; }; /** * ib_send_cm_rep - Sends a connection reply in response to a connection * request. * @cm_id: Connection identifier that will be associated with the * connection request. * @param: Connection reply information needed to establish the * connection. */ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param); /** * ib_send_cm_rtu - Sends a connection ready to use message in response * to a connection reply message. * @cm_id: Connection identifier associated with the connection request. * @private_data: Optional user-defined private data sent with the * ready to use message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_send_cm_dreq - Sends a disconnection request for an existing * connection. * @cm_id: Connection identifier associated with the connection being * released. * @private_data: Optional user-defined private data sent with the * disconnection request message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_send_cm_drep - Sends a disconnection reply to a disconnection request. * @cm_id: Connection identifier associated with the connection being * released. * @private_data: Optional user-defined private data sent with the * disconnection reply message. * @private_data_len: Size of the private data buffer, in bytes. * * If the cm_id is in the correct state, the CM will transition the connection * to the timewait state, even if an error occurs sending the DREP message. */ int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_cm_notify - Notifies the CM of an event reported to the consumer. * @cm_id: Connection identifier to transition to established. * @event: Type of event. * * This routine should be invoked by users to notify the CM of relevant * communication events. Events that should be reported to the CM and * when to report them are: * * IB_EVENT_COMM_EST - Used when a message is received on a connected * QP before an RTU has been received. * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over * to the alternate path. */ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event); /** * ib_send_cm_rej - Sends a connection rejection message to the * remote node. * @cm_id: Connection identifier associated with the connection being * rejected. * @reason: Reason for the connection request rejection. * @ari: Optional additional rejection information. * @ari_length: Size of the additional rejection information, in bytes. * @private_data: Optional user-defined private data sent with the * rejection message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len); #define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ /** * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection * message. * @cm_id: Connection identifier associated with the connection message. * @service_timeout: The lower 5-bits specify the maximum time required for * the sender to reply to the connection message. The upper 3-bits * specify additional control flags. * @private_data: Optional user-defined private data sent with the * message receipt acknowledgement. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout, const void *private_data, u8 private_data_len); /** * ib_send_cm_lap - Sends a load alternate path request. * @cm_id: Connection identifier associated with the load alternate path * message. * @alternate_path: A path record that identifies the alternate path to * load. * @private_data: Optional user-defined private data sent with the * load alternate path message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_lap(struct ib_cm_id *cm_id, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len); /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. * @cm_id: Communication identifier associated with the QP attributes to * initialize. * @qp_attr: On input, specifies the desired QP state. On output, the * mandatory and desired optional attributes will be set in order to * modify the QP to the specified state. * @qp_attr_mask: The QP attribute mask that may be used to transition the * QP to the specified state. * * Users must set the @qp_attr->qp_state to the desired QP state. This call * will set all required attributes for the given transition, along with * known optional attributes. Users may override the attributes returned from * this call before calling ib_modify_qp. */ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); /** * ib_send_cm_apr - Sends an alternate path response message in response to * a load alternate path request. * @cm_id: Connection identifier associated with the alternate path response. * @status: Reply status sent with the alternate path response. * @info: Optional additional information sent with the alternate path * response. * @info_length: Size of the additional information, in bytes. * @private_data: Optional user-defined private data sent with the * alternate path response message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_apr(struct ib_cm_id *cm_id, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len); struct ib_cm_sidr_req_param { struct ib_sa_path_rec *path; __be64 service_id; int timeout_ms; const void *private_data; u8 private_data_len; u8 max_cm_retries; }; /** * ib_send_cm_sidr_req - Sends a service ID resolution request to the * remote node. * @cm_id: Communication identifier that will be associated with the * service ID resolution request. * @param: Service ID resolution request information. */ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param *param); struct ib_cm_sidr_rep_param { u32 qp_num; u32 qkey; enum ib_cm_sidr_status status; const void *info; u8 info_length; const void *private_data; u8 private_data_len; }; /** * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the * remote node. * @cm_id: Communication identifier associated with the received service ID * resolution request. * @param: Service ID resolution reply information. */ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); #endif /* IB_CM_H */