Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/multicast.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/multicast.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/multicast.c (nonexistent) @@ -1,900 +0,0 @@ -/* - * Copyright (c) 2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#define LINUXKPI_PARAM_PREFIX ibcore_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "sa.h" - -static void mcast_add_one(struct ib_device *device); -static void mcast_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client mcast_client = { - .name = "ib_multicast", - .add = mcast_add_one, - .remove = mcast_remove_one -}; - -static struct ib_sa_client sa_client; -static struct workqueue_struct *mcast_wq; -static union ib_gid mgid0; - -struct mcast_device; - -struct mcast_port { - struct mcast_device *dev; - spinlock_t lock; - struct rb_root table; - atomic_t refcount; - struct completion comp; - u8 port_num; -}; - -struct mcast_device { - struct ib_device *device; - struct ib_event_handler event_handler; - int start_port; - int end_port; - struct mcast_port port[0]; -}; - -enum mcast_state { - MCAST_JOINING, - MCAST_MEMBER, - MCAST_ERROR, -}; - -enum mcast_group_state { - MCAST_IDLE, - MCAST_BUSY, - MCAST_GROUP_ERROR, - MCAST_PKEY_EVENT -}; - -enum { - MCAST_INVALID_PKEY_INDEX = 0xFFFF -}; - -struct mcast_member; - -struct mcast_group { - struct ib_sa_mcmember_rec rec; - struct rb_node node; - struct mcast_port *port; - spinlock_t lock; - struct work_struct work; - struct list_head pending_list; - struct list_head active_list; - struct mcast_member *last_join; - int members[NUM_JOIN_MEMBERSHIP_TYPES]; - atomic_t refcount; - enum mcast_group_state state; - struct ib_sa_query *query; - u16 pkey_index; - u8 leave_state; - int retries; -}; - -struct mcast_member { - struct ib_sa_multicast multicast; - struct ib_sa_client *client; - struct mcast_group *group; - struct list_head list; - enum mcast_state state; - atomic_t refcount; - struct completion comp; -}; - -static void join_handler(int status, struct ib_sa_mcmember_rec *rec, - void *context); -static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, - void *context); - -static struct mcast_group *mcast_find(struct mcast_port *port, - union ib_gid *mgid) -{ - struct rb_node *node = port->table.rb_node; - struct mcast_group *group; - int ret; - - while (node) { - group = rb_entry(node, struct mcast_group, node); - ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); - if (!ret) - return group; - - if (ret < 0) - node = node->rb_left; - else - node = node->rb_right; - } - return NULL; -} - -static struct mcast_group *mcast_insert(struct mcast_port *port, - struct mcast_group *group, - int allow_duplicates) -{ - struct rb_node **link = &port->table.rb_node; - struct rb_node *parent = NULL; - struct mcast_group *cur_group; - int ret; - - while (*link) { - parent = *link; - cur_group = rb_entry(parent, struct mcast_group, node); - - ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, - sizeof group->rec.mgid); - if (ret < 0) - link = &(*link)->rb_left; - else if (ret > 0) - link = &(*link)->rb_right; - else if (allow_duplicates) - link = &(*link)->rb_left; - else - return cur_group; - } - rb_link_node(&group->node, parent, link); - rb_insert_color(&group->node, &port->table); - return NULL; -} - -static void deref_port(struct mcast_port *port) -{ - if (atomic_dec_and_test(&port->refcount)) - complete(&port->comp); -} - -static void release_group(struct mcast_group *group) -{ - struct mcast_port *port = group->port; - unsigned long flags; - - spin_lock_irqsave(&port->lock, flags); - if (atomic_dec_and_test(&group->refcount)) { - rb_erase(&group->node, &port->table); - spin_unlock_irqrestore(&port->lock, flags); - kfree(group); - deref_port(port); - } else - spin_unlock_irqrestore(&port->lock, flags); -} - -static void deref_member(struct mcast_member *member) -{ - if (atomic_dec_and_test(&member->refcount)) - complete(&member->comp); -} - -static void queue_join(struct mcast_member *member) -{ - struct mcast_group *group = member->group; - unsigned long flags; - - spin_lock_irqsave(&group->lock, flags); - list_add_tail(&member->list, &group->pending_list); - if (group->state == MCAST_IDLE) { - group->state = MCAST_BUSY; - atomic_inc(&group->refcount); - queue_work(mcast_wq, &group->work); - } - spin_unlock_irqrestore(&group->lock, flags); -} - -/* - * A multicast group has four types of members: full member, non member, - * sendonly non member and sendonly full member. - * We need to keep track of the number of members of each - * type based on their join state. Adjust the number of members the belong to - * the specified join states. - */ -static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) -{ - int i; - - for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) - if (join_state & 0x1) - group->members[i] += inc; -} - -/* - * If a multicast group has zero members left for a particular join state, but - * the group is still a member with the SA, we need to leave that join state. - * Determine which join states we still belong to, but that do not have any - * active members. - */ -static u8 get_leave_state(struct mcast_group *group) -{ - u8 leave_state = 0; - int i; - - for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) - if (!group->members[i]) - leave_state |= (0x1 << i); - - return leave_state & group->rec.join_state; -} - -static int check_selector(ib_sa_comp_mask comp_mask, - ib_sa_comp_mask selector_mask, - ib_sa_comp_mask value_mask, - u8 selector, u8 src_value, u8 dst_value) -{ - int err; - - if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) - return 0; - - switch (selector) { - case IB_SA_GT: - err = (src_value <= dst_value); - break; - case IB_SA_LT: - err = (src_value >= dst_value); - break; - case IB_SA_EQ: - err = (src_value != dst_value); - break; - default: - err = 0; - break; - } - - return err; -} - -static int cmp_rec(struct ib_sa_mcmember_rec *src, - struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) -{ - /* MGID must already match */ - - if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && - memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) - return -EINVAL; - if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, - IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, - src->mtu, dst->mtu)) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && - src->traffic_class != dst->traffic_class) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) - return -EINVAL; - if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, - IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, - src->rate, dst->rate)) - return -EINVAL; - if (check_selector(comp_mask, - IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, - IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, - dst->packet_life_time_selector, - src->packet_life_time, dst->packet_life_time)) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && - src->flow_label != dst->flow_label) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && - src->hop_limit != dst->hop_limit) - return -EINVAL; - if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) - return -EINVAL; - - /* join_state checked separately, proxy_join ignored */ - - return 0; -} - -static int send_join(struct mcast_group *group, struct mcast_member *member) -{ - struct mcast_port *port = group->port; - int ret; - - group->last_join = member; - ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, - port->port_num, IB_MGMT_METHOD_SET, - &member->multicast.rec, - member->multicast.comp_mask, - 3000, GFP_KERNEL, join_handler, group, - &group->query); - return (ret > 0) ? 0 : ret; -} - -static int send_leave(struct mcast_group *group, u8 leave_state) -{ - struct mcast_port *port = group->port; - struct ib_sa_mcmember_rec rec; - int ret; - - rec = group->rec; - rec.join_state = leave_state; - group->leave_state = leave_state; - - ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, - port->port_num, IB_SA_METHOD_DELETE, &rec, - IB_SA_MCMEMBER_REC_MGID | - IB_SA_MCMEMBER_REC_PORT_GID | - IB_SA_MCMEMBER_REC_JOIN_STATE, - 3000, GFP_KERNEL, leave_handler, - group, &group->query); - return (ret > 0) ? 0 : ret; -} - -static void join_group(struct mcast_group *group, struct mcast_member *member, - u8 join_state) -{ - member->state = MCAST_MEMBER; - adjust_membership(group, join_state, 1); - group->rec.join_state |= join_state; - member->multicast.rec = group->rec; - member->multicast.rec.join_state = join_state; - list_move(&member->list, &group->active_list); -} - -static int fail_join(struct mcast_group *group, struct mcast_member *member, - int status) -{ - spin_lock_irq(&group->lock); - list_del_init(&member->list); - spin_unlock_irq(&group->lock); - return member->multicast.callback(status, &member->multicast); -} - -static void process_group_error(struct mcast_group *group) -{ - struct mcast_member *member; - int ret = 0; - u16 pkey_index; - - if (group->state == MCAST_PKEY_EVENT) - ret = ib_find_pkey(group->port->dev->device, - group->port->port_num, - be16_to_cpu(group->rec.pkey), &pkey_index); - - spin_lock_irq(&group->lock); - if (group->state == MCAST_PKEY_EVENT && !ret && - group->pkey_index == pkey_index) - goto out; - - while (!list_empty(&group->active_list)) { - member = list_entry(group->active_list.next, - struct mcast_member, list); - atomic_inc(&member->refcount); - list_del_init(&member->list); - adjust_membership(group, member->multicast.rec.join_state, -1); - member->state = MCAST_ERROR; - spin_unlock_irq(&group->lock); - - ret = member->multicast.callback(-ENETRESET, - &member->multicast); - deref_member(member); - if (ret) - ib_sa_free_multicast(&member->multicast); - spin_lock_irq(&group->lock); - } - - group->rec.join_state = 0; -out: - group->state = MCAST_BUSY; - spin_unlock_irq(&group->lock); -} - -static void mcast_work_handler(struct work_struct *work) -{ - struct mcast_group *group; - struct mcast_member *member; - struct ib_sa_multicast *multicast; - int status, ret; - u8 join_state; - - group = container_of(work, typeof(*group), work); -retest: - spin_lock_irq(&group->lock); - while (!list_empty(&group->pending_list) || - (group->state != MCAST_BUSY)) { - - if (group->state != MCAST_BUSY) { - spin_unlock_irq(&group->lock); - process_group_error(group); - goto retest; - } - - member = list_entry(group->pending_list.next, - struct mcast_member, list); - multicast = &member->multicast; - join_state = multicast->rec.join_state; - atomic_inc(&member->refcount); - - if (join_state == (group->rec.join_state & join_state)) { - status = cmp_rec(&group->rec, &multicast->rec, - multicast->comp_mask); - if (!status) - join_group(group, member, join_state); - else - list_del_init(&member->list); - spin_unlock_irq(&group->lock); - ret = multicast->callback(status, multicast); - } else { - spin_unlock_irq(&group->lock); - status = send_join(group, member); - if (!status) { - deref_member(member); - return; - } - ret = fail_join(group, member, status); - } - - deref_member(member); - if (ret) - ib_sa_free_multicast(&member->multicast); - spin_lock_irq(&group->lock); - } - - join_state = get_leave_state(group); - if (join_state) { - group->rec.join_state &= ~join_state; - spin_unlock_irq(&group->lock); - if (send_leave(group, join_state)) - goto retest; - } else { - group->state = MCAST_IDLE; - spin_unlock_irq(&group->lock); - release_group(group); - } -} - -/* - * Fail a join request if it is still active - at the head of the pending queue. - */ -static void process_join_error(struct mcast_group *group, int status) -{ - struct mcast_member *member; - int ret; - - spin_lock_irq(&group->lock); - member = list_entry(group->pending_list.next, - struct mcast_member, list); - if (group->last_join == member) { - atomic_inc(&member->refcount); - list_del_init(&member->list); - spin_unlock_irq(&group->lock); - ret = member->multicast.callback(status, &member->multicast); - deref_member(member); - if (ret) - ib_sa_free_multicast(&member->multicast); - } else - spin_unlock_irq(&group->lock); -} - -static void join_handler(int status, struct ib_sa_mcmember_rec *rec, - void *context) -{ - struct mcast_group *group = context; - u16 pkey_index = MCAST_INVALID_PKEY_INDEX; - - if (status) - process_join_error(group, status); - else { - int mgids_changed, is_mgid0; - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); - - spin_lock_irq(&group->port->lock); - if (group->state == MCAST_BUSY && - group->pkey_index == MCAST_INVALID_PKEY_INDEX) - group->pkey_index = pkey_index; - mgids_changed = memcmp(&rec->mgid, &group->rec.mgid, - sizeof(group->rec.mgid)); - group->rec = *rec; - if (mgids_changed) { - rb_erase(&group->node, &group->port->table); - is_mgid0 = !memcmp(&mgid0, &group->rec.mgid, - sizeof(mgid0)); - mcast_insert(group->port, group, is_mgid0); - } - spin_unlock_irq(&group->port->lock); - } - mcast_work_handler(&group->work); -} - -static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, - void *context) -{ - struct mcast_group *group = context; - - if (status && group->retries > 0 && - !send_leave(group, group->leave_state)) - group->retries--; - else - mcast_work_handler(&group->work); -} - -static struct mcast_group *acquire_group(struct mcast_port *port, - union ib_gid *mgid, gfp_t gfp_mask) -{ - struct mcast_group *group, *cur_group; - unsigned long flags; - int is_mgid0; - - is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); - if (!is_mgid0) { - spin_lock_irqsave(&port->lock, flags); - group = mcast_find(port, mgid); - if (group) - goto found; - spin_unlock_irqrestore(&port->lock, flags); - } - - group = kzalloc(sizeof *group, gfp_mask); - if (!group) - return NULL; - - group->retries = 3; - group->port = port; - group->rec.mgid = *mgid; - group->pkey_index = MCAST_INVALID_PKEY_INDEX; - INIT_LIST_HEAD(&group->pending_list); - INIT_LIST_HEAD(&group->active_list); - INIT_WORK(&group->work, mcast_work_handler); - spin_lock_init(&group->lock); - - spin_lock_irqsave(&port->lock, flags); - cur_group = mcast_insert(port, group, is_mgid0); - if (cur_group) { - kfree(group); - group = cur_group; - } else - atomic_inc(&port->refcount); -found: - atomic_inc(&group->refcount); - spin_unlock_irqrestore(&port->lock, flags); - return group; -} - -/* - * We serialize all join requests to a single group to make our lives much - * easier. Otherwise, two users could try to join the same group - * simultaneously, with different configurations, one could leave while the - * join is in progress, etc., which makes locking around error recovery - * difficult. - */ -struct ib_sa_multicast * -ib_sa_join_multicast(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, gfp_t gfp_mask, - int (*callback)(int status, - struct ib_sa_multicast *multicast), - void *context) -{ - struct mcast_device *dev; - struct mcast_member *member; - struct ib_sa_multicast *multicast; - int ret; - - dev = ib_get_client_data(device, &mcast_client); - if (!dev) - return ERR_PTR(-ENODEV); - - member = kmalloc(sizeof *member, gfp_mask); - if (!member) - return ERR_PTR(-ENOMEM); - - ib_sa_client_get(client); - member->client = client; - member->multicast.rec = *rec; - member->multicast.comp_mask = comp_mask; - member->multicast.callback = callback; - member->multicast.context = context; - init_completion(&member->comp); - atomic_set(&member->refcount, 1); - member->state = MCAST_JOINING; - - member->group = acquire_group(&dev->port[port_num - dev->start_port], - &rec->mgid, gfp_mask); - if (!member->group) { - ret = -ENOMEM; - goto err; - } - - /* - * The user will get the multicast structure in their callback. They - * could then free the multicast structure before we can return from - * this routine. So we save the pointer to return before queuing - * any callback. - */ - multicast = &member->multicast; - queue_join(member); - return multicast; - -err: - ib_sa_client_put(client); - kfree(member); - return ERR_PTR(ret); -} -EXPORT_SYMBOL(ib_sa_join_multicast); - -void ib_sa_free_multicast(struct ib_sa_multicast *multicast) -{ - struct mcast_member *member; - struct mcast_group *group; - - member = container_of(multicast, struct mcast_member, multicast); - group = member->group; - - spin_lock_irq(&group->lock); - if (member->state == MCAST_MEMBER) - adjust_membership(group, multicast->rec.join_state, -1); - - list_del_init(&member->list); - - if (group->state == MCAST_IDLE) { - group->state = MCAST_BUSY; - spin_unlock_irq(&group->lock); - /* Continue to hold reference on group until callback */ - queue_work(mcast_wq, &group->work); - } else { - spin_unlock_irq(&group->lock); - release_group(group); - } - - deref_member(member); - wait_for_completion(&member->comp); - ib_sa_client_put(member->client); - kfree(member); -} -EXPORT_SYMBOL(ib_sa_free_multicast); - -int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, - union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) -{ - struct mcast_device *dev; - struct mcast_port *port; - struct mcast_group *group; - unsigned long flags; - int ret = 0; - - dev = ib_get_client_data(device, &mcast_client); - if (!dev) - return -ENODEV; - - port = &dev->port[port_num - dev->start_port]; - spin_lock_irqsave(&port->lock, flags); - group = mcast_find(port, mgid); - if (group) - *rec = group->rec; - else - ret = -EADDRNOTAVAIL; - spin_unlock_irqrestore(&port->lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_sa_get_mcmember_rec); - -int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, - struct ib_sa_mcmember_rec *rec, - struct net_device *ndev, - enum ib_gid_type gid_type, - struct ib_ah_attr *ah_attr) -{ - int ret; - u16 gid_index; - u8 p; - - if (rdma_protocol_roce(device, port_num)) { - ret = ib_find_cached_gid_by_port(device, &rec->port_gid, - gid_type, port_num, - ndev, - &gid_index); - } else if (rdma_protocol_ib(device, port_num)) { - ret = ib_find_cached_gid(device, &rec->port_gid, - IB_GID_TYPE_IB, NULL, &p, - &gid_index); - } else { - ret = -EINVAL; - } - - if (ret) - return ret; - - memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(rec->mlid); - ah_attr->sl = rec->sl; - ah_attr->port_num = port_num; - ah_attr->static_rate = rec->rate; - - ah_attr->ah_flags = IB_AH_GRH; - ah_attr->grh.dgid = rec->mgid; - - ah_attr->grh.sgid_index = (u8) gid_index; - ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); - ah_attr->grh.hop_limit = rec->hop_limit; - ah_attr->grh.traffic_class = rec->traffic_class; - - return 0; -} -EXPORT_SYMBOL(ib_init_ah_from_mcmember); - -static void mcast_groups_event(struct mcast_port *port, - enum mcast_group_state state) -{ - struct mcast_group *group; - struct rb_node *node; - unsigned long flags; - - spin_lock_irqsave(&port->lock, flags); - for (node = rb_first(&port->table); node; node = rb_next(node)) { - group = rb_entry(node, struct mcast_group, node); - spin_lock(&group->lock); - if (group->state == MCAST_IDLE) { - atomic_inc(&group->refcount); - queue_work(mcast_wq, &group->work); - } - if (group->state != MCAST_GROUP_ERROR) - group->state = state; - spin_unlock(&group->lock); - } - spin_unlock_irqrestore(&port->lock, flags); -} - -static void mcast_event_handler(struct ib_event_handler *handler, - struct ib_event *event) -{ - struct mcast_device *dev; - int index; - - dev = container_of(handler, struct mcast_device, event_handler); - if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) - return; - - index = event->element.port_num - dev->start_port; - - switch (event->event) { - case IB_EVENT_PORT_ERR: - case IB_EVENT_LID_CHANGE: - case IB_EVENT_SM_CHANGE: - case IB_EVENT_CLIENT_REREGISTER: - mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); - break; - case IB_EVENT_PKEY_CHANGE: - mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); - break; - default: - break; - } -} - -static void mcast_add_one(struct ib_device *device) -{ - struct mcast_device *dev; - struct mcast_port *port; - int i; - int count = 0; - - dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, - GFP_KERNEL); - if (!dev) - return; - - dev->start_port = rdma_start_port(device); - dev->end_port = rdma_end_port(device); - - for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (!rdma_cap_ib_mcast(device, dev->start_port + i)) - continue; - port = &dev->port[i]; - port->dev = dev; - port->port_num = dev->start_port + i; - spin_lock_init(&port->lock); - port->table = RB_ROOT; - init_completion(&port->comp); - atomic_set(&port->refcount, 1); - ++count; - } - - if (!count) { - kfree(dev); - return; - } - - dev->device = device; - ib_set_client_data(device, &mcast_client, dev); - - INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); - ib_register_event_handler(&dev->event_handler); -} - -static void mcast_remove_one(struct ib_device *device, void *client_data) -{ - struct mcast_device *dev = client_data; - struct mcast_port *port; - int i; - - if (!dev) - return; - - ib_unregister_event_handler(&dev->event_handler); - flush_workqueue(mcast_wq); - - for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_cap_ib_mcast(device, dev->start_port + i)) { - port = &dev->port[i]; - deref_port(port); - wait_for_completion(&port->comp); - } - } - - kfree(dev); -} - -int mcast_init(void) -{ - int ret; - - mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); - if (!mcast_wq) - return -ENOMEM; - - ib_sa_register_client(&sa_client); - - ret = ib_register_client(&mcast_client); - if (ret) - goto err; - return 0; - -err: - ib_sa_unregister_client(&sa_client); - destroy_workqueue(mcast_wq); - return ret; -} - -void mcast_cleanup(void) -{ - ib_unregister_client(&mcast_client); - ib_sa_unregister_client(&sa_client); - destroy_workqueue(mcast_wq); -} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/multicast.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/user_mad.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/user_mad.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/user_mad.c (nonexistent) @@ -1,1404 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008 Cisco. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#define pr_fmt(fmt) "user_mad: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); -MODULE_LICENSE("Dual BSD/GPL"); - -enum { - IB_UMAD_MAX_PORTS = 64, - IB_UMAD_MAX_AGENTS = 32, - - IB_UMAD_MAJOR = 231, - IB_UMAD_MINOR_BASE = 0 -}; - -/* - * Our lifetime rules for these structs are the following: - * device special file is opened, we take a reference on the - * ib_umad_port's struct ib_umad_device. We drop these - * references in the corresponding close(). - * - * In addition to references coming from open character devices, there - * is one more reference to each ib_umad_device representing the - * module's reference taken when allocating the ib_umad_device in - * ib_umad_add_one(). - * - * When destroying an ib_umad_device, we drop the module's reference. - */ - -struct ib_umad_port { - struct cdev cdev; - struct device *dev; - - struct cdev sm_cdev; - struct device *sm_dev; - struct semaphore sm_sem; - - struct mutex file_mutex; - struct list_head file_list; - - struct ib_device *ib_dev; - struct ib_umad_device *umad_dev; - int dev_num; - u8 port_num; -}; - -struct ib_umad_device { - struct kobject kobj; - struct ib_umad_port port[0]; -}; - -struct ib_umad_file { - struct mutex mutex; - struct ib_umad_port *port; - struct file *filp; - struct list_head recv_list; - struct list_head send_list; - struct list_head port_list; - spinlock_t send_lock; - wait_queue_head_t recv_wait; - struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; - int agents_dead; - u8 use_pkey_index; - u8 already_used; -}; - -struct ib_umad_packet { - struct ib_mad_send_buf *msg; - struct ib_mad_recv_wc *recv_wc; - struct list_head list; - int length; - struct ib_user_mad mad; -}; - -static struct class *umad_class; - -static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); - -static DEFINE_SPINLOCK(port_lock); -static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); - -static void ib_umad_add_one(struct ib_device *device); -static void ib_umad_remove_one(struct ib_device *device, void *client_data); - -static void ib_umad_release_dev(struct kobject *kobj) -{ - struct ib_umad_device *dev = - container_of(kobj, struct ib_umad_device, kobj); - - kfree(dev); -} - -static struct kobj_type ib_umad_dev_ktype = { - .release = ib_umad_release_dev, -}; - -static int hdr_size(struct ib_umad_file *file) -{ - return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : - sizeof (struct ib_user_mad_hdr_old); -} - -/* caller must hold file->mutex */ -static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) -{ - return file->agents_dead ? NULL : file->agent[id]; -} - -static int queue_packet(struct ib_umad_file *file, - struct ib_mad_agent *agent, - struct ib_umad_packet *packet) -{ - int ret = 1; - - mutex_lock(&file->mutex); - - for (packet->mad.hdr.id = 0; - packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; - packet->mad.hdr.id++) - if (agent == __get_agent(file, packet->mad.hdr.id)) { - list_add_tail(&packet->list, &file->recv_list); - wake_up_interruptible(&file->recv_wait); - linux_poll_wakeup(file->filp); - ret = 0; - break; - } - - mutex_unlock(&file->mutex); - - return ret; -} - -static void dequeue_send(struct ib_umad_file *file, - struct ib_umad_packet *packet) -{ - spin_lock_irq(&file->send_lock); - list_del(&packet->list); - spin_unlock_irq(&file->send_lock); -} - -static void send_handler(struct ib_mad_agent *agent, - struct ib_mad_send_wc *send_wc) -{ - struct ib_umad_file *file = agent->context; - struct ib_umad_packet *packet = send_wc->send_buf->context[0]; - - dequeue_send(file, packet); - ib_destroy_ah(packet->msg->ah); - ib_free_send_mad(packet->msg); - - if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - packet->length = IB_MGMT_MAD_HDR; - packet->mad.hdr.status = ETIMEDOUT; - if (!queue_packet(file, agent, packet)) - return; - } - kfree(packet); -} - -static void recv_handler(struct ib_mad_agent *agent, - struct ib_mad_send_buf *send_buf, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_umad_file *file = agent->context; - struct ib_umad_packet *packet; - - if (mad_recv_wc->wc->status != IB_WC_SUCCESS) - goto err1; - - packet = kzalloc(sizeof *packet, GFP_KERNEL); - if (!packet) - goto err1; - - packet->length = mad_recv_wc->mad_len; - packet->recv_wc = mad_recv_wc; - - packet->mad.hdr.status = 0; - packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; - packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.hdr.sl = mad_recv_wc->wc->sl; - packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; - packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; - packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); - if (packet->mad.hdr.grh_present) { - struct ib_ah_attr ah_attr; - - ib_init_ah_from_wc(agent->device, agent->port_num, - mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, - &ah_attr); - - packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; - packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; - packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; - memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); - packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); - } - - if (queue_packet(file, agent, packet)) - goto err2; - return; - -err2: - kfree(packet); -err1: - ib_free_recv_mad(mad_recv_wc); -} - -static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, - struct ib_umad_packet *packet, size_t count) -{ - struct ib_mad_recv_buf *recv_buf; - int left, seg_payload, offset, max_seg_payload; - size_t seg_size; - - recv_buf = &packet->recv_wc->recv_buf; - seg_size = packet->recv_wc->mad_seg_size; - - /* We need enough room to copy the first (or only) MAD segment. */ - if ((packet->length <= seg_size && - count < hdr_size(file) + packet->length) || - (packet->length > seg_size && - count < hdr_size(file) + seg_size)) - return -EINVAL; - - if (copy_to_user(buf, &packet->mad, hdr_size(file))) - return -EFAULT; - - buf += hdr_size(file); - seg_payload = min_t(int, packet->length, seg_size); - if (copy_to_user(buf, recv_buf->mad, seg_payload)) - return -EFAULT; - - if (seg_payload < packet->length) { - /* - * Multipacket RMPP MAD message. Copy remainder of message. - * Note that last segment may have a shorter payload. - */ - if (count < hdr_size(file) + packet->length) { - /* - * The buffer is too small, return the first RMPP segment, - * which includes the RMPP message length. - */ - return -ENOSPC; - } - offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); - max_seg_payload = seg_size - offset; - - for (left = packet->length - seg_payload, buf += seg_payload; - left; left -= seg_payload, buf += seg_payload) { - recv_buf = container_of(recv_buf->list.next, - struct ib_mad_recv_buf, list); - seg_payload = min(left, max_seg_payload); - if (copy_to_user(buf, (char *)recv_buf->mad + offset, - seg_payload)) - return -EFAULT; - } - } - return hdr_size(file) + packet->length; -} - -static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, - struct ib_umad_packet *packet, size_t count) -{ - ssize_t size = hdr_size(file) + packet->length; - - if (count < size) - return -EINVAL; - - if (copy_to_user(buf, &packet->mad, hdr_size(file))) - return -EFAULT; - - buf += hdr_size(file); - - if (copy_to_user(buf, packet->mad.data, packet->length)) - return -EFAULT; - - return size; -} - -static ssize_t ib_umad_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) -{ - struct ib_umad_file *file = filp->private_data; - struct ib_umad_packet *packet; - ssize_t ret; - - if (count < hdr_size(file)) - return -EINVAL; - - mutex_lock(&file->mutex); - - while (list_empty(&file->recv_list)) { - mutex_unlock(&file->mutex); - - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(file->recv_wait, - !list_empty(&file->recv_list))) - return -ERESTARTSYS; - - mutex_lock(&file->mutex); - } - - packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); - list_del(&packet->list); - - mutex_unlock(&file->mutex); - - if (packet->recv_wc) - ret = copy_recv_mad(file, buf, packet, count); - else - ret = copy_send_mad(file, buf, packet, count); - - if (ret < 0) { - /* Requeue packet */ - mutex_lock(&file->mutex); - list_add(&packet->list, &file->recv_list); - mutex_unlock(&file->mutex); - } else { - if (packet->recv_wc) - ib_free_recv_mad(packet->recv_wc); - kfree(packet); - } - return ret; -} - -static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) -{ - int left, seg; - - /* Copy class specific header */ - if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && - copy_from_user((char *)msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, - msg->hdr_len - IB_MGMT_RMPP_HDR)) - return -EFAULT; - - /* All headers are in place. Copy data segments. */ - for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; - seg++, left -= msg->seg_size, buf += msg->seg_size) { - if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, - min(left, msg->seg_size))) - return -EFAULT; - } - return 0; -} - -static int same_destination(struct ib_user_mad_hdr *hdr1, - struct ib_user_mad_hdr *hdr2) -{ - if (!hdr1->grh_present && !hdr2->grh_present) - return (hdr1->lid == hdr2->lid); - - if (hdr1->grh_present && hdr2->grh_present) - return !memcmp(hdr1->gid, hdr2->gid, 16); - - return 0; -} - -static int is_duplicate(struct ib_umad_file *file, - struct ib_umad_packet *packet) -{ - struct ib_umad_packet *sent_packet; - struct ib_mad_hdr *sent_hdr, *hdr; - - hdr = (struct ib_mad_hdr *) packet->mad.data; - list_for_each_entry(sent_packet, &file->send_list, list) { - sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; - - if ((hdr->tid != sent_hdr->tid) || - (hdr->mgmt_class != sent_hdr->mgmt_class)) - continue; - - /* - * No need to be overly clever here. If two new operations have - * the same TID, reject the second as a duplicate. This is more - * restrictive than required by the spec. - */ - if (!ib_response_mad(hdr)) { - if (!ib_response_mad(sent_hdr)) - return 1; - continue; - } else if (!ib_response_mad(sent_hdr)) - continue; - - if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) - return 1; - } - - return 0; -} - -static ssize_t ib_umad_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct ib_umad_file *file = filp->private_data; - struct ib_umad_packet *packet; - struct ib_mad_agent *agent; - struct ib_ah_attr ah_attr; - struct ib_ah *ah; - struct ib_rmpp_mad *rmpp_mad; - __be64 *tid; - int ret, data_len, hdr_len, copy_offset, rmpp_active; - u8 base_version; - - if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) - return -EINVAL; - - packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); - if (!packet) - return -ENOMEM; - - if (copy_from_user(&packet->mad, buf, hdr_size(file))) { - ret = -EFAULT; - goto err; - } - - if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { - ret = -EINVAL; - goto err; - } - - buf += hdr_size(file); - - if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { - ret = -EFAULT; - goto err; - } - - mutex_lock(&file->mutex); - - agent = __get_agent(file, packet->mad.hdr.id); - if (!agent) { - ret = -EINVAL; - goto err_up; - } - - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); - ah_attr.sl = packet->mad.hdr.sl; - ah_attr.src_path_bits = packet->mad.hdr.path_bits; - ah_attr.port_num = file->port->port_num; - if (packet->mad.hdr.grh_present) { - ah_attr.ah_flags = IB_AH_GRH; - memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); - ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; - ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); - ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; - ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; - } - - ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - goto err_up; - } - - rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - - if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) - && ib_mad_kernel_rmpp_agent(agent)) { - copy_offset = IB_MGMT_RMPP_HDR; - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE; - } else { - copy_offset = IB_MGMT_MAD_HDR; - rmpp_active = 0; - } - - base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; - data_len = count - hdr_size(file) - hdr_len; - packet->msg = ib_create_send_mad(agent, - be32_to_cpu(packet->mad.hdr.qpn), - packet->mad.hdr.pkey_index, rmpp_active, - hdr_len, data_len, GFP_KERNEL, - base_version); - if (IS_ERR(packet->msg)) { - ret = PTR_ERR(packet->msg); - goto err_ah; - } - - packet->msg->ah = ah; - packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->retries = packet->mad.hdr.retries; - packet->msg->context[0] = packet; - - /* Copy MAD header. Any RMPP header is already in place. */ - memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); - - if (!rmpp_active) { - if (copy_from_user((char *)packet->msg->mad + copy_offset, - buf + copy_offset, - hdr_len + data_len - copy_offset)) { - ret = -EFAULT; - goto err_msg; - } - } else { - ret = copy_rmpp_mad(packet->msg, buf); - if (ret) - goto err_msg; - } - - /* - * Set the high-order part of the transaction ID to make MADs from - * different agents unique, and allow routing responses back to the - * original requestor. - */ - if (!ib_response_mad(packet->msg->mad)) { - tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; - *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | - (be64_to_cpup(tid) & 0xffffffff)); - rmpp_mad->mad_hdr.tid = *tid; - } - - if (!ib_mad_kernel_rmpp_agent(agent) - && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) - && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { - spin_lock_irq(&file->send_lock); - list_add_tail(&packet->list, &file->send_list); - spin_unlock_irq(&file->send_lock); - } else { - spin_lock_irq(&file->send_lock); - ret = is_duplicate(file, packet); - if (!ret) - list_add_tail(&packet->list, &file->send_list); - spin_unlock_irq(&file->send_lock); - if (ret) { - ret = -EINVAL; - goto err_msg; - } - } - - ret = ib_post_send_mad(packet->msg, NULL); - if (ret) - goto err_send; - - mutex_unlock(&file->mutex); - return count; - -err_send: - dequeue_send(file, packet); -err_msg: - ib_free_send_mad(packet->msg); -err_ah: - ib_destroy_ah(ah); -err_up: - mutex_unlock(&file->mutex); -err: - kfree(packet); - return ret; -} - -static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait) -{ - struct ib_umad_file *file = filp->private_data; - - /* we will always be able to post a MAD send */ - unsigned int mask = POLLOUT | POLLWRNORM; - - poll_wait(filp, &file->recv_wait, wait); - - if (!list_empty(&file->recv_list)) - mask |= POLLIN | POLLRDNORM; - - return mask; -} - -static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, - int compat_method_mask) -{ - struct ib_user_mad_reg_req ureq; - struct ib_mad_reg_req req; - struct ib_mad_agent *agent = NULL; - int agent_id; - int ret; - - mutex_lock(&file->port->file_mutex); - mutex_lock(&file->mutex); - - if (!file->port->ib_dev) { - dev_notice(file->port->dev, - "ib_umad_reg_agent: invalid device\n"); - ret = -EPIPE; - goto out; - } - - if (copy_from_user(&ureq, arg, sizeof ureq)) { - ret = -EFAULT; - goto out; - } - - if (ureq.qpn != 0 && ureq.qpn != 1) { - dev_notice(file->port->dev, - "ib_umad_reg_agent: invalid QPN %d specified\n", - ureq.qpn); - ret = -EINVAL; - goto out; - } - - for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) - if (!__get_agent(file, agent_id)) - goto found; - - dev_notice(file->port->dev, - "ib_umad_reg_agent: Max Agents (%u) reached\n", - IB_UMAD_MAX_AGENTS); - ret = -ENOMEM; - goto out; - -found: - if (ureq.mgmt_class) { - memset(&req, 0, sizeof(req)); - req.mgmt_class = ureq.mgmt_class; - req.mgmt_class_version = ureq.mgmt_class_version; - memcpy(req.oui, ureq.oui, sizeof req.oui); - - if (compat_method_mask) { - u32 *umm = (u32 *) ureq.method_mask; - int i; - - for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) - req.method_mask[i] = - umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); - } else - memcpy(req.method_mask, ureq.method_mask, - sizeof req.method_mask); - } - - agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, - ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, - ureq.mgmt_class ? &req : NULL, - ureq.rmpp_version, - send_handler, recv_handler, file, 0); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - agent = NULL; - goto out; - } - - if (put_user(agent_id, - (u32 __user *) ((char *)arg + offsetof(struct ib_user_mad_reg_req, id)))) { - ret = -EFAULT; - goto out; - } - - if (!file->already_used) { - file->already_used = 1; - if (!file->use_pkey_index) { - dev_warn(file->port->dev, - "process %s did not enable P_Key index support.\n", - current->comm); - dev_warn(file->port->dev, - " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); - } - } - - file->agent[agent_id] = agent; - ret = 0; - -out: - mutex_unlock(&file->mutex); - - if (ret && agent) - ib_unregister_mad_agent(agent); - - mutex_unlock(&file->port->file_mutex); - - return ret; -} - -static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) -{ - struct ib_user_mad_reg_req2 ureq; - struct ib_mad_reg_req req; - struct ib_mad_agent *agent = NULL; - int agent_id; - int ret; - - mutex_lock(&file->port->file_mutex); - mutex_lock(&file->mutex); - - if (!file->port->ib_dev) { - dev_notice(file->port->dev, - "ib_umad_reg_agent2: invalid device\n"); - ret = -EPIPE; - goto out; - } - - if (copy_from_user(&ureq, arg, sizeof(ureq))) { - ret = -EFAULT; - goto out; - } - - if (ureq.qpn != 0 && ureq.qpn != 1) { - dev_notice(file->port->dev, - "ib_umad_reg_agent2: invalid QPN %d specified\n", - ureq.qpn); - ret = -EINVAL; - goto out; - } - - if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { - const u32 flags = IB_USER_MAD_REG_FLAGS_CAP; - dev_notice(file->port->dev, - "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", - ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); - ret = -EINVAL; - - if (put_user(flags, - (u32 __user *) ((char *)arg + offsetof(struct - ib_user_mad_reg_req2, flags)))) - ret = -EFAULT; - - goto out; - } - - for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) - if (!__get_agent(file, agent_id)) - goto found; - - dev_notice(file->port->dev, - "ib_umad_reg_agent2: Max Agents (%u) reached\n", - IB_UMAD_MAX_AGENTS); - ret = -ENOMEM; - goto out; - -found: - if (ureq.mgmt_class) { - memset(&req, 0, sizeof(req)); - req.mgmt_class = ureq.mgmt_class; - req.mgmt_class_version = ureq.mgmt_class_version; - if (ureq.oui & 0xff000000) { - dev_notice(file->port->dev, - "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", - ureq.oui); - ret = -EINVAL; - goto out; - } - req.oui[2] = ureq.oui & 0x0000ff; - req.oui[1] = (ureq.oui & 0x00ff00) >> 8; - req.oui[0] = (ureq.oui & 0xff0000) >> 16; - memcpy(req.method_mask, ureq.method_mask, - sizeof(req.method_mask)); - } - - agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, - ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, - ureq.mgmt_class ? &req : NULL, - ureq.rmpp_version, - send_handler, recv_handler, file, - ureq.flags); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - agent = NULL; - goto out; - } - - if (put_user(agent_id, - (u32 __user *)((char *)arg + - offsetof(struct ib_user_mad_reg_req2, id)))) { - ret = -EFAULT; - goto out; - } - - if (!file->already_used) { - file->already_used = 1; - file->use_pkey_index = 1; - } - - file->agent[agent_id] = agent; - ret = 0; - -out: - mutex_unlock(&file->mutex); - - if (ret && agent) - ib_unregister_mad_agent(agent); - - mutex_unlock(&file->port->file_mutex); - - return ret; -} - - -static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) -{ - struct ib_mad_agent *agent = NULL; - u32 id; - int ret = 0; - - if (get_user(id, arg)) - return -EFAULT; - - mutex_lock(&file->port->file_mutex); - mutex_lock(&file->mutex); - - if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { - ret = -EINVAL; - goto out; - } - - agent = file->agent[id]; - file->agent[id] = NULL; - -out: - mutex_unlock(&file->mutex); - - if (agent) - ib_unregister_mad_agent(agent); - - mutex_unlock(&file->port->file_mutex); - - return ret; -} - -static long ib_umad_enable_pkey(struct ib_umad_file *file) -{ - int ret = 0; - - mutex_lock(&file->mutex); - if (file->already_used) - ret = -EINVAL; - else - file->use_pkey_index = 1; - mutex_unlock(&file->mutex); - - return ret; -} - -static long ib_umad_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - switch (cmd) { - case IB_USER_MAD_REGISTER_AGENT: - return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); - case IB_USER_MAD_UNREGISTER_AGENT: - return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); - case IB_USER_MAD_ENABLE_PKEY: - return ib_umad_enable_pkey(filp->private_data); - case IB_USER_MAD_REGISTER_AGENT2: - return ib_umad_reg_agent2(filp->private_data, (void __user *) arg); - default: - return -ENOIOCTLCMD; - } -} - -#ifdef CONFIG_COMPAT -static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - switch (cmd) { - case IB_USER_MAD_REGISTER_AGENT: - return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); - case IB_USER_MAD_UNREGISTER_AGENT: - return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); - case IB_USER_MAD_ENABLE_PKEY: - return ib_umad_enable_pkey(filp->private_data); - case IB_USER_MAD_REGISTER_AGENT2: - return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg)); - default: - return -ENOIOCTLCMD; - } -} -#endif - -/* - * ib_umad_open() does not need the BKL: - * - * - the ib_umad_port structures are properly reference counted, and - * everything else is purely local to the file being created, so - * races against other open calls are not a problem; - * - the ioctl method does not affect any global state outside of the - * file structure being operated on; - */ -static int ib_umad_open(struct inode *inode, struct file *filp) -{ - struct ib_umad_port *port; - struct ib_umad_file *file; - int ret = -ENXIO; - - port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, cdev); - - mutex_lock(&port->file_mutex); - - if (!port->ib_dev) - goto out; - - ret = -ENOMEM; - file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) - goto out; - - mutex_init(&file->mutex); - spin_lock_init(&file->send_lock); - INIT_LIST_HEAD(&file->recv_list); - INIT_LIST_HEAD(&file->send_list); - init_waitqueue_head(&file->recv_wait); - - file->port = port; - file->filp = filp; - filp->private_data = file; - - list_add_tail(&file->port_list, &port->file_list); - - ret = nonseekable_open(inode, filp); - if (ret) { - list_del(&file->port_list); - kfree(file); - goto out; - } - - kobject_get(&port->umad_dev->kobj); - -out: - mutex_unlock(&port->file_mutex); - return ret; -} - -static int ib_umad_close(struct inode *inode, struct file *filp) -{ - struct ib_umad_file *file = filp->private_data; - struct ib_umad_device *dev = file->port->umad_dev; - struct ib_umad_packet *packet, *tmp; - int already_dead; - int i; - - mutex_lock(&file->port->file_mutex); - mutex_lock(&file->mutex); - - already_dead = file->agents_dead; - file->agents_dead = 1; - - list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { - if (packet->recv_wc) - ib_free_recv_mad(packet->recv_wc); - kfree(packet); - } - - list_del(&file->port_list); - - mutex_unlock(&file->mutex); - - if (!already_dead) - for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) - if (file->agent[i]) - ib_unregister_mad_agent(file->agent[i]); - - mutex_unlock(&file->port->file_mutex); - - kfree(file); - kobject_put(&dev->kobj); - - return 0; -} - -static const struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, - .unlocked_ioctl = ib_umad_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ib_umad_compat_ioctl, -#endif - .open = ib_umad_open, - .release = ib_umad_close, - .llseek = no_llseek, -}; - -static int ib_umad_sm_open(struct inode *inode, struct file *filp) -{ - struct ib_umad_port *port; - struct ib_port_modify props = { - .set_port_cap_mask = IB_PORT_SM - }; - int ret; - - port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, sm_cdev); - - if (filp->f_flags & O_NONBLOCK) { - if (down_trylock(&port->sm_sem)) { - ret = -EAGAIN; - goto fail; - } - } else { - if (down_interruptible(&port->sm_sem)) { - ret = -ERESTARTSYS; - goto fail; - } - } - - ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - if (ret) - goto err_up_sem; - - filp->private_data = port; - - ret = nonseekable_open(inode, filp); - if (ret) - goto err_clr_sm_cap; - - kobject_get(&port->umad_dev->kobj); - - return 0; - -err_clr_sm_cap: - swap(props.set_port_cap_mask, props.clr_port_cap_mask); - ib_modify_port(port->ib_dev, port->port_num, 0, &props); - -err_up_sem: - up(&port->sm_sem); - -fail: - return ret; -} - -static int ib_umad_sm_close(struct inode *inode, struct file *filp) -{ - struct ib_umad_port *port = filp->private_data; - struct ib_port_modify props = { - .clr_port_cap_mask = IB_PORT_SM - }; - int ret = 0; - - mutex_lock(&port->file_mutex); - if (port->ib_dev) - ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - mutex_unlock(&port->file_mutex); - - up(&port->sm_sem); - - kobject_put(&port->umad_dev->kobj); - - return ret; -} - -static const struct file_operations umad_sm_fops = { - .owner = THIS_MODULE, - .open = ib_umad_sm_open, - .release = ib_umad_sm_close, - .llseek = no_llseek, -}; - -static struct ib_client umad_client = { - .name = "umad", - .add = ib_umad_add_one, - .remove = ib_umad_remove_one -}; - -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct ib_umad_port *port = dev_get_drvdata(dev); - - if (!port) - return -ENODEV; - - return sprintf(buf, "%s\n", port->ib_dev->name); -} -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - -static ssize_t show_port(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct ib_umad_port *port = dev_get_drvdata(dev); - - if (!port) - return -ENODEV; - - return sprintf(buf, "%d\n", port->port_num); -} -static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); - -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_MAD_ABI_VERSION)); - -static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); -static int find_overflow_devnum(struct ib_device *device) -{ - int ret; - - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, - "infiniband_mad"); - if (ret) { - dev_err(&device->dev, - "couldn't register dynamic device number\n"); - return ret; - } - } - - ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); - if (ret >= IB_UMAD_MAX_PORTS) - return -1; - - return ret; -} - -static int ib_umad_init_port(struct ib_device *device, int port_num, - struct ib_umad_device *umad_dev, - struct ib_umad_port *port) -{ - int devnum; - dev_t base; - - spin_lock(&port_lock); - devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (devnum >= IB_UMAD_MAX_PORTS) { - spin_unlock(&port_lock); - devnum = find_overflow_devnum(device); - if (devnum < 0) - return -1; - - spin_lock(&port_lock); - port->dev_num = devnum + IB_UMAD_MAX_PORTS; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - port->dev_num = devnum; - base = devnum + base_dev; - set_bit(devnum, dev_map); - } - spin_unlock(&port_lock); - - port->ib_dev = device; - port->port_num = port_num; - sema_init(&port->sm_sem, 1); - mutex_init(&port->file_mutex); - INIT_LIST_HEAD(&port->file_list); - - cdev_init(&port->cdev, &umad_fops); - port->cdev.owner = THIS_MODULE; - port->cdev.kobj.parent = &umad_dev->kobj; - kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); - if (cdev_add(&port->cdev, base, 1)) - goto err_cdev; - - port->dev = device_create(umad_class, device->dma_device, - port->cdev.dev, port, - "umad%d", port->dev_num); - if (IS_ERR(port->dev)) - goto err_cdev; - - if (device_create_file(port->dev, &dev_attr_ibdev)) - goto err_dev; - if (device_create_file(port->dev, &dev_attr_port)) - goto err_dev; - - base += IB_UMAD_MAX_PORTS; - cdev_init(&port->sm_cdev, &umad_sm_fops); - port->sm_cdev.owner = THIS_MODULE; - port->sm_cdev.kobj.parent = &umad_dev->kobj; - kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); - if (cdev_add(&port->sm_cdev, base, 1)) - goto err_sm_cdev; - - port->sm_dev = device_create(umad_class, device->dma_device, - port->sm_cdev.dev, port, - "issm%d", port->dev_num); - if (IS_ERR(port->sm_dev)) - goto err_sm_cdev; - - if (device_create_file(port->sm_dev, &dev_attr_ibdev)) - goto err_sm_dev; - if (device_create_file(port->sm_dev, &dev_attr_port)) - goto err_sm_dev; - - return 0; - -err_sm_dev: - device_destroy(umad_class, port->sm_cdev.dev); - -err_sm_cdev: - cdev_del(&port->sm_cdev); - -err_dev: - device_destroy(umad_class, port->cdev.dev); - -err_cdev: - cdev_del(&port->cdev); - if (port->dev_num < IB_UMAD_MAX_PORTS) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); - - return -1; -} - -static void ib_umad_kill_port(struct ib_umad_port *port) -{ - struct ib_umad_file *file; - int id; - - dev_set_drvdata(port->dev, NULL); - dev_set_drvdata(port->sm_dev, NULL); - - device_destroy(umad_class, port->cdev.dev); - device_destroy(umad_class, port->sm_cdev.dev); - - cdev_del(&port->cdev); - cdev_del(&port->sm_cdev); - - mutex_lock(&port->file_mutex); - - port->ib_dev = NULL; - - list_for_each_entry(file, &port->file_list, port_list) { - mutex_lock(&file->mutex); - file->agents_dead = 1; - mutex_unlock(&file->mutex); - - for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) - if (file->agent[id]) - ib_unregister_mad_agent(file->agent[id]); - } - - mutex_unlock(&port->file_mutex); - - if (port->dev_num < IB_UMAD_MAX_PORTS) - clear_bit(port->dev_num, dev_map); - else - clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); -} - -static void ib_umad_add_one(struct ib_device *device) -{ - struct ib_umad_device *umad_dev; - int s, e, i; - int count = 0; - - s = rdma_start_port(device); - e = rdma_end_port(device); - - umad_dev = kzalloc(sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port), - GFP_KERNEL); - if (!umad_dev) - return; - - kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); - - for (i = s; i <= e; ++i) { - if (!rdma_cap_ib_mad(device, i)) - continue; - - umad_dev->port[i - s].umad_dev = umad_dev; - - if (ib_umad_init_port(device, i, umad_dev, - &umad_dev->port[i - s])) - goto err; - - count++; - } - - if (!count) - goto free; - - ib_set_client_data(device, &umad_client, umad_dev); - - return; - -err: - while (--i >= s) { - if (!rdma_cap_ib_mad(device, i)) - continue; - - ib_umad_kill_port(&umad_dev->port[i - s]); - } -free: - kobject_put(&umad_dev->kobj); -} - -static void ib_umad_remove_one(struct ib_device *device, void *client_data) -{ - struct ib_umad_device *umad_dev = client_data; - int i; - - if (!umad_dev) - return; - - for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { - if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) - ib_umad_kill_port(&umad_dev->port[i]); - } - - kobject_put(&umad_dev->kobj); -} - -static char *umad_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); -} - -static int __init ib_umad_init(void) -{ - int ret; - - ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, - "infiniband_mad"); - if (ret) { - pr_err("couldn't register device number\n"); - goto out; - } - - umad_class = class_create(THIS_MODULE, "infiniband_mad"); - if (IS_ERR(umad_class)) { - ret = PTR_ERR(umad_class); - pr_err("couldn't create class infiniband_mad\n"); - goto out_chrdev; - } - - umad_class->devnode = umad_devnode; - - ret = class_create_file(umad_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("couldn't create abi_version attribute\n"); - goto out_class; - } - - ret = ib_register_client(&umad_client); - if (ret) { - pr_err("couldn't register ib_umad client\n"); - goto out_class; - } - - return 0; - -out_class: - class_destroy(umad_class); - -out_chrdev: - unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); - -out: - return ret; -} - -static void __exit ib_umad_cleanup(void) -{ - ib_unregister_client(&umad_client); - class_destroy(umad_class); - unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); -} - -module_init_order(ib_umad_init, SI_ORDER_THIRD); -module_exit(ib_umad_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/user_mad.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cma.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cma.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cma.c (nonexistent) @@ -1,4307 +0,0 @@ -/* - * Copyright (c) 2005 Voltaire Inc. All rights reserved. - * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#define LINUXKPI_PARAM_PREFIX ibcore_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "core_priv.h" - -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("Generic RDMA CM Agent"); -MODULE_LICENSE("Dual BSD/GPL"); - -#define CMA_CM_RESPONSE_TIMEOUT 20 -#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 -#define CMA_MAX_CM_RETRIES 15 -#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) -#define CMA_IBOE_PACKET_LIFETIME 18 - -static const char * const cma_events[] = { - [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", - [RDMA_CM_EVENT_ADDR_ERROR] = "address error", - [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", - [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", - [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", - [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", - [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", - [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", - [RDMA_CM_EVENT_REJECTED] = "rejected", - [RDMA_CM_EVENT_ESTABLISHED] = "established", - [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", - [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", - [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", - [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", - [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", - [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", -}; - -const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) -{ - size_t index = event; - - return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? - cma_events[index] : "unrecognized event"; -} -EXPORT_SYMBOL(rdma_event_msg); - -static void cma_add_one(struct ib_device *device); -static void cma_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client cma_client = { - .name = "cma", - .add = cma_add_one, - .remove = cma_remove_one -}; - -static struct ib_sa_client sa_client; -static struct rdma_addr_client addr_client; -static LIST_HEAD(dev_list); -static LIST_HEAD(listen_any_list); -static DEFINE_MUTEX(lock); -static struct workqueue_struct *cma_wq; - -struct cma_pernet { - struct idr tcp_ps; - struct idr udp_ps; - struct idr ipoib_ps; - struct idr ib_ps; -}; - -VNET_DEFINE(struct cma_pernet, cma_pernet); - -static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) -{ - struct cma_pernet *retval; - - CURVNET_SET_QUIET(vnet); - retval = &VNET(cma_pernet); - CURVNET_RESTORE(); - - return (retval); -} - -static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) -{ - struct cma_pernet *pernet = cma_pernet_ptr(net); - - switch (ps) { - case RDMA_PS_TCP: - return &pernet->tcp_ps; - case RDMA_PS_UDP: - return &pernet->udp_ps; - case RDMA_PS_IPOIB: - return &pernet->ipoib_ps; - case RDMA_PS_IB: - return &pernet->ib_ps; - default: - return NULL; - } -} - -struct cma_device { - struct list_head list; - struct ib_device *device; - struct completion comp; - atomic_t refcount; - struct list_head id_list; - struct sysctl_ctx_list sysctl_ctx; - enum ib_gid_type *default_gid_type; -}; - -struct rdma_bind_list { - enum rdma_port_space ps; - struct hlist_head owners; - unsigned short port; -}; - -struct class_port_info_context { - struct ib_class_port_info *class_port_info; - struct ib_device *device; - struct completion done; - struct ib_sa_query *sa_query; - u8 port_num; -}; - -static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, - struct rdma_bind_list *bind_list, int snum) -{ - struct idr *idr = cma_pernet_idr(vnet, ps); - - return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); -} - -static struct rdma_bind_list *cma_ps_find(struct vnet *net, - enum rdma_port_space ps, int snum) -{ - struct idr *idr = cma_pernet_idr(net, ps); - - return idr_find(idr, snum); -} - -static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) -{ - struct idr *idr = cma_pernet_idr(net, ps); - - idr_remove(idr, snum); -} - -enum { - CMA_OPTION_AFONLY, -}; - -void cma_ref_dev(struct cma_device *cma_dev) -{ - atomic_inc(&cma_dev->refcount); -} - -struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, - void *cookie) -{ - struct cma_device *cma_dev; - struct cma_device *found_cma_dev = NULL; - - mutex_lock(&lock); - - list_for_each_entry(cma_dev, &dev_list, list) - if (filter(cma_dev->device, cookie)) { - found_cma_dev = cma_dev; - break; - } - - if (found_cma_dev) - cma_ref_dev(found_cma_dev); - mutex_unlock(&lock); - return found_cma_dev; -} - -int cma_get_default_gid_type(struct cma_device *cma_dev, - unsigned int port) -{ - if (port < rdma_start_port(cma_dev->device) || - port > rdma_end_port(cma_dev->device)) - return -EINVAL; - - return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; -} - -int cma_set_default_gid_type(struct cma_device *cma_dev, - unsigned int port, - enum ib_gid_type default_gid_type) -{ - unsigned long supported_gids; - - if (port < rdma_start_port(cma_dev->device) || - port > rdma_end_port(cma_dev->device)) - return -EINVAL; - - supported_gids = roce_gid_type_mask_support(cma_dev->device, port); - - if (!(supported_gids & 1 << default_gid_type)) - return -EINVAL; - - cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = - default_gid_type; - - return 0; -} - -struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) -{ - return cma_dev->device; -} - -/* - * Device removal can occur at anytime, so we need extra handling to - * serialize notifying the user of device removal with other callbacks. - * We do this by disabling removal notification while a callback is in process, - * and reporting it after the callback completes. - */ -struct rdma_id_private { - struct rdma_cm_id id; - - struct rdma_bind_list *bind_list; - struct hlist_node node; - struct list_head list; /* listen_any_list or cma_device.list */ - struct list_head listen_list; /* per device listens */ - struct cma_device *cma_dev; - struct list_head mc_list; - - int internal_id; - enum rdma_cm_state state; - spinlock_t lock; - struct mutex qp_mutex; - - struct completion comp; - atomic_t refcount; - struct mutex handler_mutex; - - int backlog; - int timeout_ms; - struct ib_sa_query *query; - int query_id; - union { - struct ib_cm_id *ib; - struct iw_cm_id *iw; - } cm_id; - - u32 seq_num; - u32 qkey; - u32 qp_num; - pid_t owner; - u32 options; - u8 srq; - u8 tos; - u8 reuseaddr; - u8 afonly; - enum ib_gid_type gid_type; -}; - -struct cma_multicast { - struct rdma_id_private *id_priv; - union { - struct ib_sa_multicast *ib; - } multicast; - struct list_head list; - void *context; - struct sockaddr_storage addr; - struct kref mcref; - bool igmp_joined; - u8 join_state; -}; - -struct cma_work { - struct work_struct work; - struct rdma_id_private *id; - enum rdma_cm_state old_state; - enum rdma_cm_state new_state; - struct rdma_cm_event event; -}; - -struct cma_ndev_work { - struct work_struct work; - struct rdma_id_private *id; - struct rdma_cm_event event; -}; - -struct iboe_mcast_work { - struct work_struct work; - struct rdma_id_private *id; - struct cma_multicast *mc; -}; - -union cma_ip_addr { - struct in6_addr ip6; - struct { - __be32 pad[3]; - __be32 addr; - } ip4; -}; - -struct cma_hdr { - u8 cma_version; - u8 ip_version; /* IP version: 7:4 */ - __be16 port; - union cma_ip_addr src_addr; - union cma_ip_addr dst_addr; -}; - -#define CMA_VERSION 0x00 - -struct cma_req_info { - struct ib_device *device; - int port; - union ib_gid local_gid; - __be64 service_id; - u16 pkey; - bool has_gid:1; -}; - -static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&id_priv->lock, flags); - ret = (id_priv->state == comp); - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} - -static int cma_comp_exch(struct rdma_id_private *id_priv, - enum rdma_cm_state comp, enum rdma_cm_state exch) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&id_priv->lock, flags); - if ((ret = (id_priv->state == comp))) - id_priv->state = exch; - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} - -static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, - enum rdma_cm_state exch) -{ - unsigned long flags; - enum rdma_cm_state old; - - spin_lock_irqsave(&id_priv->lock, flags); - old = id_priv->state; - id_priv->state = exch; - spin_unlock_irqrestore(&id_priv->lock, flags); - return old; -} - -static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) -{ - return hdr->ip_version >> 4; -} - -static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) -{ - hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); -} - -static void _cma_attach_to_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) -{ - cma_ref_dev(cma_dev); - id_priv->cma_dev = cma_dev; - id_priv->gid_type = 0; - id_priv->id.device = cma_dev->device; - id_priv->id.route.addr.dev_addr.transport = - rdma_node_get_transport(cma_dev->device->node_type); - list_add_tail(&id_priv->list, &cma_dev->id_list); -} - -static void cma_attach_to_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) -{ - _cma_attach_to_dev(id_priv, cma_dev); - id_priv->gid_type = - cma_dev->default_gid_type[id_priv->id.port_num - - rdma_start_port(cma_dev->device)]; -} - -void cma_deref_dev(struct cma_device *cma_dev) -{ - if (atomic_dec_and_test(&cma_dev->refcount)) - complete(&cma_dev->comp); -} - -static inline void release_mc(struct kref *kref) -{ - struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); - - kfree(mc->multicast.ib); - kfree(mc); -} - -static void cma_release_dev(struct rdma_id_private *id_priv) -{ - mutex_lock(&lock); - list_del(&id_priv->list); - cma_deref_dev(id_priv->cma_dev); - id_priv->cma_dev = NULL; - mutex_unlock(&lock); -} - -static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.src_addr; -} - -static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; -} - -static inline unsigned short cma_family(struct rdma_id_private *id_priv) -{ - return id_priv->id.route.addr.src_addr.ss_family; -} - -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) -{ - struct ib_sa_mcmember_rec rec; - int ret = 0; - - if (id_priv->qkey) { - if (qkey && id_priv->qkey != qkey) - return -EINVAL; - return 0; - } - - if (qkey) { - id_priv->qkey = qkey; - return 0; - } - - switch (id_priv->id.ps) { - case RDMA_PS_UDP: - case RDMA_PS_IB: - id_priv->qkey = RDMA_UDP_QKEY; - break; - case RDMA_PS_IPOIB: - ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); - ret = ib_sa_get_mcmember_rec(id_priv->id.device, - id_priv->id.port_num, &rec.mgid, - &rec); - if (!ret) - id_priv->qkey = be32_to_cpu(rec.qkey); - break; - default: - break; - } - return ret; -} - -static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) -{ - dev_addr->dev_type = ARPHRD_INFINIBAND; - rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); - ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); -} - -static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) -{ - int ret; - - if (addr->sa_family != AF_IB) { - ret = rdma_translate_ip(addr, dev_addr, NULL); - } else { - cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); - ret = 0; - } - - return ret; -} - -static inline int cma_validate_port(struct ib_device *device, u8 port, - enum ib_gid_type gid_type, - union ib_gid *gid, int dev_type, - struct vnet *net, - int bound_if_index) -{ - int ret = -ENODEV; - struct net_device *ndev = NULL; - - if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ret; - - if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ret; - - if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { - ndev = dev_get_by_index(net, bound_if_index); - if (ndev && ndev->if_flags & IFF_LOOPBACK) { - pr_info("detected loopback device\n"); - dev_put(ndev); - - if (!device->get_netdev) - return -EOPNOTSUPP; - - ndev = device->get_netdev(device, port); - if (!ndev) - return -ENODEV; - } - } else { - gid_type = IB_GID_TYPE_IB; - } - - ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, - ndev, NULL); - - if (ndev) - dev_put(ndev); - - return ret; -} - -static int cma_acquire_dev(struct rdma_id_private *id_priv, - struct rdma_id_private *listen_id_priv) -{ - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct cma_device *cma_dev; - union ib_gid gid, iboe_gid, *gidp; - int ret = -ENODEV; - u8 port; - - if (dev_addr->dev_type != ARPHRD_INFINIBAND && - id_priv->id.ps == RDMA_PS_IPOIB) - return -EINVAL; - - mutex_lock(&lock); - rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, - &iboe_gid); - - memcpy(&gid, dev_addr->src_dev_addr + - rdma_addr_gid_offset(dev_addr), sizeof gid); - - if (listen_id_priv) { - cma_dev = listen_id_priv->cma_dev; - port = listen_id_priv->id.port_num; - gidp = rdma_protocol_roce(cma_dev->device, port) ? - &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - listen_id_priv->gid_type, gidp, - dev_addr->dev_type, - dev_addr->net, - dev_addr->bound_dev_if); - if (!ret) { - id_priv->id.port_num = port; - goto out; - } - } - - list_for_each_entry(cma_dev, &dev_list, list) { - for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { - if (listen_id_priv && - listen_id_priv->cma_dev == cma_dev && - listen_id_priv->id.port_num == port) - continue; - - gidp = rdma_protocol_roce(cma_dev->device, port) ? - &iboe_gid : &gid; - - ret = cma_validate_port(cma_dev->device, port, - rdma_protocol_ib(cma_dev->device, port) ? - IB_GID_TYPE_IB : - cma_dev->default_gid_type[port - 1], - gidp, dev_addr->dev_type, - dev_addr->net, - dev_addr->bound_dev_if); - if (!ret) { - id_priv->id.port_num = port; - goto out; - } - } - } - -out: - if (!ret) - cma_attach_to_dev(id_priv, cma_dev); - - mutex_unlock(&lock); - return ret; -} - -/* - * Select the source IB device and address to reach the destination IB address. - */ -static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) -{ - struct cma_device *cma_dev, *cur_dev; - struct sockaddr_ib *addr; - union ib_gid gid, sgid, *dgid; - u16 pkey, index; - u8 p; - int i; - - cma_dev = NULL; - addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); - dgid = (union ib_gid *) &addr->sib_addr; - pkey = ntohs(addr->sib_pkey); - - list_for_each_entry(cur_dev, &dev_list, list) { - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { - if (!rdma_cap_af_ib(cur_dev->device, p)) - continue; - - if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) - continue; - - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, - &gid, NULL); - i++) { - if (!memcmp(&gid, dgid, sizeof(gid))) { - cma_dev = cur_dev; - sgid = gid; - id_priv->id.port_num = p; - goto found; - } - - if (!cma_dev && (gid.global.subnet_prefix == - dgid->global.subnet_prefix)) { - cma_dev = cur_dev; - sgid = gid; - id_priv->id.port_num = p; - } - } - } - } - - if (!cma_dev) - return -ENODEV; - -found: - cma_attach_to_dev(id_priv, cma_dev); - addr = (struct sockaddr_ib *) cma_src_addr(id_priv); - memcpy(&addr->sib_addr, &sgid, sizeof sgid); - cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); - return 0; -} - -static void cma_deref_id(struct rdma_id_private *id_priv) -{ - if (atomic_dec_and_test(&id_priv->refcount)) - complete(&id_priv->comp); -} - -struct rdma_cm_id *rdma_create_id(struct vnet *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_port_space ps, - enum ib_qp_type qp_type) -{ - struct rdma_id_private *id_priv; - - id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); - if (!id_priv) - return ERR_PTR(-ENOMEM); - - id_priv->owner = task_pid_nr(current); - id_priv->state = RDMA_CM_IDLE; - id_priv->id.context = context; - id_priv->id.event_handler = event_handler; - id_priv->id.ps = ps; - id_priv->id.qp_type = qp_type; - spin_lock_init(&id_priv->lock); - mutex_init(&id_priv->qp_mutex); - init_completion(&id_priv->comp); - atomic_set(&id_priv->refcount, 1); - mutex_init(&id_priv->handler_mutex); - INIT_LIST_HEAD(&id_priv->listen_list); - INIT_LIST_HEAD(&id_priv->mc_list); - get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); - id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread); - - return &id_priv->id; -} -EXPORT_SYMBOL(rdma_create_id); - -static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) -{ - struct ib_qp_attr qp_attr; - int qp_attr_mask, ret; - - qp_attr.qp_state = IB_QPS_INIT; - ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); - if (ret) - return ret; - - ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); - if (ret) - return ret; - - qp_attr.qp_state = IB_QPS_RTR; - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); - if (ret) - return ret; - - qp_attr.qp_state = IB_QPS_RTS; - qp_attr.sq_psn = 0; - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); - - return ret; -} - -static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) -{ - struct ib_qp_attr qp_attr; - int qp_attr_mask, ret; - - qp_attr.qp_state = IB_QPS_INIT; - ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); - if (ret) - return ret; - - return ib_modify_qp(qp, &qp_attr, qp_attr_mask); -} - -int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) -{ - struct rdma_id_private *id_priv; - struct ib_qp *qp; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (id->device != pd->device) - return -EINVAL; - - qp_init_attr->port_num = id->port_num; - qp = ib_create_qp(pd, qp_init_attr); - if (IS_ERR(qp)) - return PTR_ERR(qp); - - if (id->qp_type == IB_QPT_UD) - ret = cma_init_ud_qp(id_priv, qp); - else - ret = cma_init_conn_qp(id_priv, qp); - if (ret) - goto err; - - id->qp = qp; - id_priv->qp_num = qp->qp_num; - id_priv->srq = (qp->srq != NULL); - return 0; -err: - ib_destroy_qp(qp); - return ret; -} -EXPORT_SYMBOL(rdma_create_qp); - -void rdma_destroy_qp(struct rdma_cm_id *id) -{ - struct rdma_id_private *id_priv; - - id_priv = container_of(id, struct rdma_id_private, id); - mutex_lock(&id_priv->qp_mutex); - ib_destroy_qp(id_priv->id.qp); - id_priv->id.qp = NULL; - mutex_unlock(&id_priv->qp_mutex); -} -EXPORT_SYMBOL(rdma_destroy_qp); - -static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct ib_qp_attr qp_attr; - int qp_attr_mask, ret; - union ib_gid sgid; - - mutex_lock(&id_priv->qp_mutex); - if (!id_priv->id.qp) { - ret = 0; - goto out; - } - - /* Need to update QP attributes from default values. */ - qp_attr.qp_state = IB_QPS_INIT; - ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); - if (ret) - goto out; - - ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); - if (ret) - goto out; - - qp_attr.qp_state = IB_QPS_RTR; - ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); - if (ret) - goto out; - - ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); - if (ret) - goto out; - - BUG_ON(id_priv->cma_dev->device != id_priv->id.device); - - if (conn_param) - qp_attr.max_dest_rd_atomic = conn_param->responder_resources; - ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); -out: - mutex_unlock(&id_priv->qp_mutex); - return ret; -} - -static int cma_modify_qp_rts(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct ib_qp_attr qp_attr; - int qp_attr_mask, ret; - - mutex_lock(&id_priv->qp_mutex); - if (!id_priv->id.qp) { - ret = 0; - goto out; - } - - qp_attr.qp_state = IB_QPS_RTS; - ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); - if (ret) - goto out; - - if (conn_param) - qp_attr.max_rd_atomic = conn_param->initiator_depth; - ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); -out: - mutex_unlock(&id_priv->qp_mutex); - return ret; -} - -static int cma_modify_qp_err(struct rdma_id_private *id_priv) -{ - struct ib_qp_attr qp_attr; - int ret; - - mutex_lock(&id_priv->qp_mutex); - if (!id_priv->id.qp) { - ret = 0; - goto out; - } - - qp_attr.qp_state = IB_QPS_ERR; - ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); -out: - mutex_unlock(&id_priv->qp_mutex); - return ret; -} - -static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) -{ - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - int ret; - u16 pkey; - - if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) - pkey = 0xffff; - else - pkey = ib_addr_get_pkey(dev_addr); - - ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, - pkey, &qp_attr->pkey_index); - if (ret) - return ret; - - qp_attr->port_num = id_priv->id.port_num; - *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; - - if (id_priv->id.qp_type == IB_QPT_UD) { - ret = cma_set_qkey(id_priv, 0); - if (ret) - return ret; - - qp_attr->qkey = id_priv->qkey; - *qp_attr_mask |= IB_QP_QKEY; - } else { - qp_attr->qp_access_flags = 0; - *qp_attr_mask |= IB_QP_ACCESS_FLAGS; - } - return 0; -} - -int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - struct rdma_id_private *id_priv; - int ret = 0; - - id_priv = container_of(id, struct rdma_id_private, id); - if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) - ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); - else - ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, - qp_attr_mask); - - if (qp_attr->qp_state == IB_QPS_RTR) - qp_attr->rq_psn = id_priv->seq_num; - } else if (rdma_cap_iw_cm(id->device, id->port_num)) { - if (!id_priv->cm_id.iw) { - qp_attr->qp_access_flags = 0; - *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; - } else - ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, - qp_attr_mask); - } else - ret = -ENOSYS; - - return ret; -} -EXPORT_SYMBOL(rdma_init_qp_attr); - -static inline int cma_zero_addr(struct sockaddr *addr) -{ - switch (addr->sa_family) { - case AF_INET: - return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); - case AF_INET6: - return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); - case AF_IB: - return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); - default: - return 0; - } -} - -static inline int cma_loopback_addr(struct sockaddr *addr) -{ - switch (addr->sa_family) { - case AF_INET: - return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); - case AF_INET6: - return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); - case AF_IB: - return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); - default: - return 0; - } -} - -static inline int cma_any_addr(struct sockaddr *addr) -{ - return cma_zero_addr(addr) || cma_loopback_addr(addr); -} - -static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) -{ - if (src->sa_family != dst->sa_family) - return -1; - - switch (src->sa_family) { - case AF_INET: - return ((struct sockaddr_in *) src)->sin_addr.s_addr != - ((struct sockaddr_in *) dst)->sin_addr.s_addr; - case AF_INET6: - return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, - &((struct sockaddr_in6 *) dst)->sin6_addr); - default: - return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, - &((struct sockaddr_ib *) dst)->sib_addr); - } -} - -static __be16 cma_port(struct sockaddr *addr) -{ - struct sockaddr_ib *sib; - - switch (addr->sa_family) { - case AF_INET: - return ((struct sockaddr_in *) addr)->sin_port; - case AF_INET6: - return ((struct sockaddr_in6 *) addr)->sin6_port; - case AF_IB: - sib = (struct sockaddr_ib *) addr; - return htons((u16) (be64_to_cpu(sib->sib_sid) & - be64_to_cpu(sib->sib_sid_mask))); - default: - return 0; - } -} - -static inline int cma_any_port(struct sockaddr *addr) -{ - return !cma_port(addr); -} - -static void cma_save_ib_info(struct sockaddr *src_addr, - struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct ib_sa_path_rec *path) -{ - struct sockaddr_ib *listen_ib, *ib; - - listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; - if (src_addr) { - ib = (struct sockaddr_ib *)src_addr; - ib->sib_family = AF_IB; - if (path) { - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->sgid, 16); - ib->sib_sid = path->service_id; - ib->sib_scope_id = 0; - } else { - ib->sib_pkey = listen_ib->sib_pkey; - ib->sib_flowinfo = listen_ib->sib_flowinfo; - ib->sib_addr = listen_ib->sib_addr; - ib->sib_sid = listen_ib->sib_sid; - ib->sib_scope_id = listen_ib->sib_scope_id; - } - ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); - } - if (dst_addr) { - ib = (struct sockaddr_ib *)dst_addr; - ib->sib_family = AF_IB; - if (path) { - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->dgid, 16); - } - } -} - -static void cma_save_ip4_info(struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, - struct cma_hdr *hdr, - __be16 local_port) -{ - if (src_addr) { - *src_addr = (struct sockaddr_in) { - .sin_family = AF_INET, - .sin_addr.s_addr = hdr->dst_addr.ip4.addr, - .sin_port = local_port, - }; - } - - if (dst_addr) { - *dst_addr = (struct sockaddr_in) { - .sin_family = AF_INET, - .sin_addr.s_addr = hdr->src_addr.ip4.addr, - .sin_port = hdr->port, - }; - } -} - -static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, - struct sockaddr_in6 *dst_addr, - struct cma_hdr *hdr, - __be16 local_port) -{ - if (src_addr) { - *src_addr = (struct sockaddr_in6) { - .sin6_family = AF_INET6, - .sin6_addr = hdr->dst_addr.ip6, - .sin6_port = local_port, - }; - } - - if (dst_addr) { - *dst_addr = (struct sockaddr_in6) { - .sin6_family = AF_INET6, - .sin6_addr = hdr->src_addr.ip6, - .sin6_port = hdr->port, - }; - } -} - -static u16 cma_port_from_service_id(__be64 service_id) -{ - return (u16)be64_to_cpu(service_id); -} - -static int cma_save_ip_info(struct sockaddr *src_addr, - struct sockaddr *dst_addr, - struct ib_cm_event *ib_event, - __be64 service_id) -{ - struct cma_hdr *hdr; - __be16 port; - - hdr = ib_event->private_data; - if (hdr->cma_version != CMA_VERSION) - return -EINVAL; - - port = htons(cma_port_from_service_id(service_id)); - - switch (cma_get_ip_ver(hdr)) { - case 4: - cma_save_ip4_info((struct sockaddr_in *)src_addr, - (struct sockaddr_in *)dst_addr, hdr, port); - break; - case 6: - cma_save_ip6_info((struct sockaddr_in6 *)src_addr, - (struct sockaddr_in6 *)dst_addr, hdr, port); - break; - default: - return -EAFNOSUPPORT; - } - - return 0; -} - -static int cma_save_net_info(struct sockaddr *src_addr, - struct sockaddr *dst_addr, - struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - sa_family_t sa_family, __be64 service_id) -{ - if (sa_family == AF_IB) { - if (ib_event->event == IB_CM_REQ_RECEIVED) - cma_save_ib_info(src_addr, dst_addr, listen_id, - ib_event->param.req_rcvd.primary_path); - else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) - cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); - return 0; - } - - return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); -} - -static int cma_save_req_info(const struct ib_cm_event *ib_event, - struct cma_req_info *req) -{ - const struct ib_cm_req_event_param *req_param = - &ib_event->param.req_rcvd; - const struct ib_cm_sidr_req_event_param *sidr_param = - &ib_event->param.sidr_req_rcvd; - - switch (ib_event->event) { - case IB_CM_REQ_RECEIVED: - req->device = req_param->listen_id->device; - req->port = req_param->port; - memcpy(&req->local_gid, &req_param->primary_path->sgid, - sizeof(req->local_gid)); - req->has_gid = true; - req->service_id = req_param->primary_path->service_id; - req->pkey = be16_to_cpu(req_param->primary_path->pkey); - if (req->pkey != req_param->bth_pkey) - pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" - "RDMA CMA: in the future this may cause the request to be dropped\n", - req_param->bth_pkey, req->pkey); - break; - case IB_CM_SIDR_REQ_RECEIVED: - req->device = sidr_param->listen_id->device; - req->port = sidr_param->port; - req->has_gid = false; - req->service_id = sidr_param->service_id; - req->pkey = sidr_param->pkey; - if (req->pkey != sidr_param->bth_pkey) - pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" - "RDMA CMA: in the future this may cause the request to be dropped\n", - sidr_param->bth_pkey, req->pkey); - break; - default: - return -EINVAL; - } - - return 0; -} - -static bool validate_ipv4_net_dev(struct net_device *net_dev, - const struct sockaddr_in *dst_addr, - const struct sockaddr_in *src_addr) -{ -#ifdef INET - struct sockaddr_in dst_tmp = *dst_addr; - __be32 daddr = dst_addr->sin_addr.s_addr, - saddr = src_addr->sin_addr.s_addr; - struct net_device *src_dev; - struct rtentry *rte; - bool ret; - - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || - ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || - ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || - ipv4_is_loopback(saddr)) - return false; - - src_dev = ip_dev_find(net_dev->if_vnet, saddr); - if (src_dev != net_dev) - return false; - - /* - * Make sure the socket address length field - * is set, else rtalloc1() will fail. - */ - dst_tmp.sin_len = sizeof(dst_tmp); - - CURVNET_SET(net_dev->if_vnet); - rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); - CURVNET_RESTORE(); - if (rte != NULL) { - ret = (rte->rt_ifp == net_dev); - RTFREE_LOCKED(rte); - } else { - ret = false; - } - return ret; -#else - return false; -#endif -} - -static bool validate_ipv6_net_dev(struct net_device *net_dev, - const struct sockaddr_in6 *dst_addr, - const struct sockaddr_in6 *src_addr) -{ -#ifdef INET6 - struct sockaddr_in6 dst_tmp = *dst_addr; - struct in6_addr in6_addr = src_addr->sin6_addr; - struct net_device *src_dev; - struct rtentry *rte; - bool ret; - - /* embed scope ID */ - in6_addr.s6_addr[3] = src_addr->sin6_scope_id; - - src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr); - if (src_dev != net_dev) - return false; - - /* - * Make sure the socket address length field - * is set, else rtalloc1() will fail. - */ - dst_tmp.sin6_len = sizeof(dst_tmp); - - CURVNET_SET(net_dev->if_vnet); - rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); - CURVNET_RESTORE(); - if (rte != NULL) { - ret = (rte->rt_ifp == net_dev); - RTFREE_LOCKED(rte); - } else { - ret = false; - } - return ret; -#else - return false; -#endif -} - -static bool validate_net_dev(struct net_device *net_dev, - const struct sockaddr *daddr, - const struct sockaddr *saddr) -{ - const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; - const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; - const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; - const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; - - switch (daddr->sa_family) { - case AF_INET: - return saddr->sa_family == AF_INET && - validate_ipv4_net_dev(net_dev, daddr4, saddr4); - - case AF_INET6: - return saddr->sa_family == AF_INET6 && - validate_ipv6_net_dev(net_dev, daddr6, saddr6); - - default: - return false; - } -} - -static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, - const struct cma_req_info *req) -{ - struct sockaddr_storage listen_addr_storage, src_addr_storage; - struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, - *src_addr = (struct sockaddr *)&src_addr_storage; - struct net_device *net_dev; - const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; - int err; - - err = cma_save_ip_info(listen_addr, src_addr, ib_event, - req->service_id); - if (err) - return ERR_PTR(err); - - net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, - gid, listen_addr); - if (!net_dev) - return ERR_PTR(-ENODEV); - - if (!validate_net_dev(net_dev, listen_addr, src_addr)) { - dev_put(net_dev); - return ERR_PTR(-EHOSTUNREACH); - } - - return net_dev; -} - -static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) -{ - return (be64_to_cpu(service_id) >> 16) & 0xffff; -} - -static bool cma_match_private_data(struct rdma_id_private *id_priv, - const struct cma_hdr *hdr) -{ - struct sockaddr *addr = cma_src_addr(id_priv); - __be32 ip4_addr; - struct in6_addr ip6_addr; - - if (cma_any_addr(addr) && !id_priv->afonly) - return true; - - switch (addr->sa_family) { - case AF_INET: - ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; - if (cma_get_ip_ver(hdr) != 4) - return false; - if (!cma_any_addr(addr) && - hdr->dst_addr.ip4.addr != ip4_addr) - return false; - break; - case AF_INET6: - ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; - if (cma_get_ip_ver(hdr) != 6) - return false; - if (!cma_any_addr(addr) && - memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) - return false; - break; - case AF_IB: - return true; - default: - return false; - } - - return true; -} - -static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) -{ - enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); - enum rdma_transport_type transport = - rdma_node_get_transport(device->node_type); - - return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; -} - -static bool cma_protocol_roce(const struct rdma_cm_id *id) -{ - struct ib_device *device = id->device; - const int port_num = id->port_num ?: rdma_start_port(device); - - return cma_protocol_roce_dev_port(device, port_num); -} - -static bool cma_match_net_dev(const struct rdma_cm_id *id, - const struct net_device *net_dev, - u8 port_num) -{ - const struct rdma_addr *addr = &id->route.addr; - - if (!net_dev) - /* This request is an AF_IB request or a RoCE request */ - return (!id->port_num || id->port_num == port_num) && - (addr->src_addr.ss_family == AF_IB || - cma_protocol_roce_dev_port(id->device, port_num)); - - return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), addr->dev_addr.net) && - addr->dev_addr.bound_dev_if == net_dev->if_index); -} - -static struct rdma_id_private *cma_find_listener( - const struct rdma_bind_list *bind_list, - const struct ib_cm_id *cm_id, - const struct ib_cm_event *ib_event, - const struct cma_req_info *req, - const struct net_device *net_dev) -{ - struct rdma_id_private *id_priv, *id_priv_dev; - - if (!bind_list) - return ERR_PTR(-EINVAL); - - hlist_for_each_entry(id_priv, &bind_list->owners, node) { - if (cma_match_private_data(id_priv, ib_event->private_data)) { - if (id_priv->id.device == cm_id->device && - cma_match_net_dev(&id_priv->id, net_dev, req->port)) - return id_priv; - list_for_each_entry(id_priv_dev, - &id_priv->listen_list, - listen_list) { - if (id_priv_dev->id.device == cm_id->device && - cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) - return id_priv_dev; - } - } - } - - return ERR_PTR(-EINVAL); -} - -static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event, - struct net_device **net_dev) -{ - struct cma_req_info req; - struct rdma_bind_list *bind_list; - struct rdma_id_private *id_priv; - int err; - - err = cma_save_req_info(ib_event, &req); - if (err) - return ERR_PTR(err); - - *net_dev = cma_get_net_dev(ib_event, &req); - if (IS_ERR(*net_dev)) { - if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { - /* Assuming the protocol is AF_IB */ - *net_dev = NULL; - } else if (cma_protocol_roce_dev_port(req.device, req.port)) { - /* TODO find the net dev matching the request parameters - * through the RoCE GID table */ - *net_dev = NULL; - } else { - return ERR_CAST(*net_dev); - } - } - - bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, - rdma_ps_from_service_id(req.service_id), - cma_port_from_service_id(req.service_id)); - id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); - if (IS_ERR(id_priv) && *net_dev) { - dev_put(*net_dev); - *net_dev = NULL; - } - - return id_priv; -} - -static inline int cma_user_data_offset(struct rdma_id_private *id_priv) -{ - return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); -} - -static void cma_cancel_route(struct rdma_id_private *id_priv) -{ - if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { - if (id_priv->query) - ib_sa_cancel_query(id_priv->query_id, id_priv->query); - } -} - -static void cma_cancel_listens(struct rdma_id_private *id_priv) -{ - struct rdma_id_private *dev_id_priv; - - /* - * Remove from listen_any_list to prevent added devices from spawning - * additional listen requests. - */ - mutex_lock(&lock); - list_del(&id_priv->list); - - while (!list_empty(&id_priv->listen_list)) { - dev_id_priv = list_entry(id_priv->listen_list.next, - struct rdma_id_private, listen_list); - /* sync with device removal to avoid duplicate destruction */ - list_del_init(&dev_id_priv->list); - list_del(&dev_id_priv->listen_list); - mutex_unlock(&lock); - - rdma_destroy_id(&dev_id_priv->id); - mutex_lock(&lock); - } - mutex_unlock(&lock); -} - -static void cma_cancel_operation(struct rdma_id_private *id_priv, - enum rdma_cm_state state) -{ - switch (state) { - case RDMA_CM_ADDR_QUERY: - rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); - break; - case RDMA_CM_ROUTE_QUERY: - cma_cancel_route(id_priv); - break; - case RDMA_CM_LISTEN: - if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) - cma_cancel_listens(id_priv); - break; - default: - break; - } -} - -static void cma_release_port(struct rdma_id_private *id_priv) -{ - struct rdma_bind_list *bind_list = id_priv->bind_list; - struct vnet *net = id_priv->id.route.addr.dev_addr.net; - - if (!bind_list) - return; - - mutex_lock(&lock); - hlist_del(&id_priv->node); - if (hlist_empty(&bind_list->owners)) { - cma_ps_remove(net, bind_list->ps, bind_list->port); - kfree(bind_list); - } - mutex_unlock(&lock); -} - -static void cma_leave_mc_groups(struct rdma_id_private *id_priv) -{ - struct cma_multicast *mc; - - while (!list_empty(&id_priv->mc_list)) { - mc = container_of(id_priv->mc_list.next, - struct cma_multicast, list); - list_del(&mc->list); - if (rdma_cap_ib_mcast(id_priv->cma_dev->device, - id_priv->id.port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - dev_put(ndev); - } - } - kref_put(&mc->mcref, release_mc); - } - } -} - -void rdma_destroy_id(struct rdma_cm_id *id) -{ - struct rdma_id_private *id_priv; - enum rdma_cm_state state; - - id_priv = container_of(id, struct rdma_id_private, id); - state = cma_exch(id_priv, RDMA_CM_DESTROYING); - cma_cancel_operation(id_priv, state); - - /* - * Wait for any active callback to finish. New callbacks will find - * the id_priv state set to destroying and abort. - */ - mutex_lock(&id_priv->handler_mutex); - mutex_unlock(&id_priv->handler_mutex); - - if (id_priv->cma_dev) { - if (rdma_cap_ib_cm(id_priv->id.device, 1)) { - if (id_priv->cm_id.ib) - ib_destroy_cm_id(id_priv->cm_id.ib); - } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { - if (id_priv->cm_id.iw) - iw_destroy_cm_id(id_priv->cm_id.iw); - } - cma_leave_mc_groups(id_priv); - cma_release_dev(id_priv); - } - - cma_release_port(id_priv); - cma_deref_id(id_priv); - wait_for_completion(&id_priv->comp); - - if (id_priv->internal_id) - cma_deref_id(id_priv->id.context); - - kfree(id_priv->id.route.path_rec); - kfree(id_priv); -} -EXPORT_SYMBOL(rdma_destroy_id); - -static int cma_rep_recv(struct rdma_id_private *id_priv) -{ - int ret; - - ret = cma_modify_qp_rtr(id_priv, NULL); - if (ret) - goto reject; - - ret = cma_modify_qp_rts(id_priv, NULL); - if (ret) - goto reject; - - ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); - if (ret) - goto reject; - - return 0; -reject: - cma_modify_qp_err(id_priv); - ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); - return ret; -} - -static void cma_set_rep_event_data(struct rdma_cm_event *event, - struct ib_cm_rep_event_param *rep_data, - void *private_data) -{ - event->param.conn.private_data = private_data; - event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; - event->param.conn.responder_resources = rep_data->responder_resources; - event->param.conn.initiator_depth = rep_data->initiator_depth; - event->param.conn.flow_control = rep_data->flow_control; - event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; - event->param.conn.srq = rep_data->srq; - event->param.conn.qp_num = rep_data->remote_qpn; -} - -static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) -{ - struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; - int ret = 0; - - mutex_lock(&id_priv->handler_mutex); - if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - id_priv->state != RDMA_CM_CONNECT) || - (ib_event->event == IB_CM_TIMEWAIT_EXIT && - id_priv->state != RDMA_CM_DISCONNECT)) - goto out; - - memset(&event, 0, sizeof event); - switch (ib_event->event) { - case IB_CM_REQ_ERROR: - case IB_CM_REP_ERROR: - event.event = RDMA_CM_EVENT_UNREACHABLE; - event.status = -ETIMEDOUT; - break; - case IB_CM_REP_RECEIVED: - if (id_priv->id.qp) { - event.status = cma_rep_recv(id_priv); - event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : - RDMA_CM_EVENT_ESTABLISHED; - } else { - event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; - } - cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, - ib_event->private_data); - break; - case IB_CM_RTU_RECEIVED: - case IB_CM_USER_ESTABLISHED: - event.event = RDMA_CM_EVENT_ESTABLISHED; - break; - case IB_CM_DREQ_ERROR: - event.status = -ETIMEDOUT; /* fall through */ - case IB_CM_DREQ_RECEIVED: - case IB_CM_DREP_RECEIVED: - if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, - RDMA_CM_DISCONNECT)) - goto out; - event.event = RDMA_CM_EVENT_DISCONNECTED; - break; - case IB_CM_TIMEWAIT_EXIT: - event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; - break; - case IB_CM_MRA_RECEIVED: - /* ignore event */ - goto out; - case IB_CM_REJ_RECEIVED: - cma_modify_qp_err(id_priv); - event.status = ib_event->param.rej_rcvd.reason; - event.event = RDMA_CM_EVENT_REJECTED; - event.param.conn.private_data = ib_event->private_data; - event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; - break; - default: - pr_err("RDMA CMA: unexpected IB CM event: %d\n", - ib_event->event); - goto out; - } - - ret = id_priv->id.event_handler(&id_priv->id, &event); - if (ret) { - /* Destroy the CM ID by returning a non-zero value. */ - id_priv->cm_id.ib = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); - return ret; - } -out: - mutex_unlock(&id_priv->handler_mutex); - return ret; -} - -static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) -{ - struct rdma_id_private *id_priv; - struct rdma_cm_id *id; - struct rdma_route *rt; - const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; - const __be64 service_id = - ib_event->param.req_rcvd.primary_path->service_id; - int ret; - - id = rdma_create_id(listen_id->route.addr.dev_addr.net, - listen_id->event_handler, listen_id->context, - listen_id->ps, ib_event->param.req_rcvd.qp_type); - if (IS_ERR(id)) - return NULL; - - id_priv = container_of(id, struct rdma_id_private, id); - if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, - (struct sockaddr *)&id->route.addr.dst_addr, - listen_id, ib_event, ss_family, service_id)) - goto err; - - rt = &id->route; - rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, - GFP_KERNEL); - if (!rt->path_rec) - goto err; - - rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; - if (rt->num_paths == 2) - rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - - if (net_dev) { - ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); - if (ret) - goto err; - } else { - if (!cma_protocol_roce(listen_id) && - cma_any_addr(cma_src_addr(id_priv))) { - rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; - rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); - ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - } else if (!cma_any_addr(cma_src_addr(id_priv))) { - ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); - if (ret) - goto err; - } - } - rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - - id_priv->state = RDMA_CM_CONNECT; - return id_priv; - -err: - rdma_destroy_id(id); - return NULL; -} - -static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, - struct ib_cm_event *ib_event, - struct net_device *net_dev) -{ - struct rdma_id_private *id_priv; - struct rdma_cm_id *id; - const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; - struct vnet *net = listen_id->route.addr.dev_addr.net; - int ret; - - id = rdma_create_id(net, listen_id->event_handler, listen_id->context, - listen_id->ps, IB_QPT_UD); - if (IS_ERR(id)) - return NULL; - - id_priv = container_of(id, struct rdma_id_private, id); - if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, - (struct sockaddr *)&id->route.addr.dst_addr, - listen_id, ib_event, ss_family, - ib_event->param.sidr_req_rcvd.service_id)) - goto err; - - if (net_dev) { - ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); - if (ret) - goto err; - } else { - if (!cma_any_addr(cma_src_addr(id_priv))) { - ret = cma_translate_addr(cma_src_addr(id_priv), - &id->route.addr.dev_addr); - if (ret) - goto err; - } - } - - id_priv->state = RDMA_CM_CONNECT; - return id_priv; -err: - rdma_destroy_id(id); - return NULL; -} - -static void cma_set_req_event_data(struct rdma_cm_event *event, - struct ib_cm_req_event_param *req_data, - void *private_data, int offset) -{ - event->param.conn.private_data = (char *)private_data + offset; - event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; - event->param.conn.responder_resources = req_data->responder_resources; - event->param.conn.initiator_depth = req_data->initiator_depth; - event->param.conn.flow_control = req_data->flow_control; - event->param.conn.retry_count = req_data->retry_count; - event->param.conn.rnr_retry_count = req_data->rnr_retry_count; - event->param.conn.srq = req_data->srq; - event->param.conn.qp_num = req_data->remote_qpn; -} - -static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) -{ - return (((ib_event->event == IB_CM_REQ_RECEIVED) && - (ib_event->param.req_rcvd.qp_type == id->qp_type)) || - ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && - (id->qp_type == IB_QPT_UD)) || - (!id->qp_type)); -} - -static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) -{ - struct rdma_id_private *listen_id, *conn_id = NULL; - struct rdma_cm_event event; - struct net_device *net_dev; - int offset, ret; - - listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); - if (IS_ERR(listen_id)) - return PTR_ERR(listen_id); - - if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { - ret = -EINVAL; - goto net_dev_put; - } - - mutex_lock(&listen_id->handler_mutex); - if (listen_id->state != RDMA_CM_LISTEN) { - ret = -ECONNABORTED; - goto err1; - } - - memset(&event, 0, sizeof event); - offset = cma_user_data_offset(listen_id); - event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { - conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); - event.param.ud.private_data = (char *)ib_event->private_data + offset; - event.param.ud.private_data_len = - IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; - } else { - conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); - cma_set_req_event_data(&event, &ib_event->param.req_rcvd, - ib_event->private_data, offset); - } - if (!conn_id) { - ret = -ENOMEM; - goto err1; - } - - mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - ret = cma_acquire_dev(conn_id, listen_id); - if (ret) - goto err2; - - conn_id->cm_id.ib = cm_id; - cm_id->context = conn_id; - cm_id->cm_handler = cma_ib_handler; - - /* - * Protect against the user destroying conn_id from another thread - * until we're done accessing it. - */ - atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); - if (ret) - goto err3; - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); - if (cma_comp(conn_id, RDMA_CM_CONNECT) && - (conn_id->id.qp_type != IB_QPT_UD)) - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); - mutex_unlock(&lock); - mutex_unlock(&conn_id->handler_mutex); - mutex_unlock(&listen_id->handler_mutex); - cma_deref_id(conn_id); - if (net_dev) - dev_put(net_dev); - return 0; - -err3: - cma_deref_id(conn_id); - /* Destroy the CM ID by returning a non-zero value. */ - conn_id->cm_id.ib = NULL; -err2: - cma_exch(conn_id, RDMA_CM_DESTROYING); - mutex_unlock(&conn_id->handler_mutex); -err1: - mutex_unlock(&listen_id->handler_mutex); - if (conn_id) - rdma_destroy_id(&conn_id->id); - -net_dev_put: - if (net_dev) - dev_put(net_dev); - - return ret; -} - -__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) -{ - if (addr->sa_family == AF_IB) - return ((struct sockaddr_ib *) addr)->sib_sid; - - return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); -} -EXPORT_SYMBOL(rdma_get_service_id); - -static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) -{ - struct rdma_id_private *id_priv = iw_id->context; - struct rdma_cm_event event; - int ret = 0; - struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; - struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; - - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_CONNECT) - goto out; - - memset(&event, 0, sizeof event); - switch (iw_event->event) { - case IW_CM_EVENT_CLOSE: - event.event = RDMA_CM_EVENT_DISCONNECTED; - break; - case IW_CM_EVENT_CONNECT_REPLY: - memcpy(cma_src_addr(id_priv), laddr, - rdma_addr_size(laddr)); - memcpy(cma_dst_addr(id_priv), raddr, - rdma_addr_size(raddr)); - switch (iw_event->status) { - case 0: - event.event = RDMA_CM_EVENT_ESTABLISHED; - event.param.conn.initiator_depth = iw_event->ird; - event.param.conn.responder_resources = iw_event->ord; - break; - case -ECONNRESET: - case -ECONNREFUSED: - event.event = RDMA_CM_EVENT_REJECTED; - break; - case -ETIMEDOUT: - event.event = RDMA_CM_EVENT_UNREACHABLE; - break; - default: - event.event = RDMA_CM_EVENT_CONNECT_ERROR; - break; - } - break; - case IW_CM_EVENT_ESTABLISHED: - event.event = RDMA_CM_EVENT_ESTABLISHED; - event.param.conn.initiator_depth = iw_event->ird; - event.param.conn.responder_resources = iw_event->ord; - break; - default: - BUG_ON(1); - } - - event.status = iw_event->status; - event.param.conn.private_data = iw_event->private_data; - event.param.conn.private_data_len = iw_event->private_data_len; - ret = id_priv->id.event_handler(&id_priv->id, &event); - if (ret) { - /* Destroy the CM ID by returning a non-zero value. */ - id_priv->cm_id.iw = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); - return ret; - } - -out: - mutex_unlock(&id_priv->handler_mutex); - return ret; -} - -static int iw_conn_req_handler(struct iw_cm_id *cm_id, - struct iw_cm_event *iw_event) -{ - struct rdma_cm_id *new_cm_id; - struct rdma_id_private *listen_id, *conn_id; - struct rdma_cm_event event; - int ret = -ECONNABORTED; - struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; - struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; - - listen_id = cm_id->context; - - mutex_lock(&listen_id->handler_mutex); - if (listen_id->state != RDMA_CM_LISTEN) - goto out; - - /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, - listen_id->id.event_handler, - listen_id->id.context, - RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(new_cm_id)) { - ret = -ENOMEM; - goto out; - } - conn_id = container_of(new_cm_id, struct rdma_id_private, id); - mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - conn_id->state = RDMA_CM_CONNECT; - - ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); - if (ret) { - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; - } - - ret = cma_acquire_dev(conn_id, listen_id); - if (ret) { - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; - } - - conn_id->cm_id.iw = cm_id; - cm_id->context = conn_id; - cm_id->cm_handler = cma_iw_handler; - - memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); - memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); - - memset(&event, 0, sizeof event); - event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - event.param.conn.private_data = iw_event->private_data; - event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = iw_event->ird; - event.param.conn.responder_resources = iw_event->ord; - - /* - * Protect against the user destroying conn_id from another thread - * until we're done accessing it. - */ - atomic_inc(&conn_id->refcount); - ret = conn_id->id.event_handler(&conn_id->id, &event); - if (ret) { - /* User wants to destroy the CM ID */ - conn_id->cm_id.iw = NULL; - cma_exch(conn_id, RDMA_CM_DESTROYING); - mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); - rdma_destroy_id(&conn_id->id); - goto out; - } - - mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); - -out: - mutex_unlock(&listen_id->handler_mutex); - return ret; -} - -static int cma_ib_listen(struct rdma_id_private *id_priv) -{ - struct sockaddr *addr; - struct ib_cm_id *id; - __be64 svc_id; - - addr = cma_src_addr(id_priv); - svc_id = rdma_get_service_id(&id_priv->id, addr); - id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); - if (IS_ERR(id)) - return PTR_ERR(id); - id_priv->cm_id.ib = id; - - return 0; -} - -static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) -{ - int ret; - struct iw_cm_id *id; - - id = iw_create_cm_id(id_priv->id.device, - iw_conn_req_handler, - id_priv); - if (IS_ERR(id)) - return PTR_ERR(id); - - id->tos = id_priv->tos; - id_priv->cm_id.iw = id; - - memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), - rdma_addr_size(cma_src_addr(id_priv))); - - ret = iw_cm_listen(id_priv->cm_id.iw, backlog); - - if (ret) { - iw_destroy_cm_id(id_priv->cm_id.iw); - id_priv->cm_id.iw = NULL; - } - - return ret; -} - -static int cma_listen_handler(struct rdma_cm_id *id, - struct rdma_cm_event *event) -{ - struct rdma_id_private *id_priv = id->context; - - id->context = id_priv->id.context; - id->event_handler = id_priv->id.event_handler; - return id_priv->id.event_handler(id, event); -} - -static void cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) -{ - struct rdma_id_private *dev_id_priv; - struct rdma_cm_id *id; - struct vnet *net = id_priv->id.route.addr.dev_addr.net; - int ret; - - if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) - return; - - id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, - id_priv->id.qp_type); - if (IS_ERR(id)) - return; - - dev_id_priv = container_of(id, struct rdma_id_private, id); - - dev_id_priv->state = RDMA_CM_ADDR_BOUND; - memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), - rdma_addr_size(cma_src_addr(id_priv))); - - _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); - atomic_inc(&id_priv->refcount); - dev_id_priv->internal_id = 1; - dev_id_priv->afonly = id_priv->afonly; - - ret = rdma_listen(id, id_priv->backlog); - if (ret) - pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", - ret, cma_dev->device->name); -} - -static void cma_listen_on_all(struct rdma_id_private *id_priv) -{ - struct cma_device *cma_dev; - - mutex_lock(&lock); - list_add_tail(&id_priv->list, &listen_any_list); - list_for_each_entry(cma_dev, &dev_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); -} - -void rdma_set_service_type(struct rdma_cm_id *id, int tos) -{ - struct rdma_id_private *id_priv; - - id_priv = container_of(id, struct rdma_id_private, id); - id_priv->tos = (u8) tos; -} -EXPORT_SYMBOL(rdma_set_service_type); - -static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, - void *context) -{ - struct cma_work *work = context; - struct rdma_route *route; - - route = &work->id->id.route; - - if (!status) { - route->num_paths = 1; - *route->path_rec = *path_rec; - } else { - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; - work->event.status = status; - } - - queue_work(cma_wq, &work->work); -} - -static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, - struct cma_work *work) -{ - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct ib_sa_path_rec path_rec; - ib_sa_comp_mask comp_mask; - struct sockaddr_in6 *sin6; - struct sockaddr_ib *sib; - - memset(&path_rec, 0, sizeof path_rec); - rdma_addr_get_sgid(dev_addr, &path_rec.sgid); - rdma_addr_get_dgid(dev_addr, &path_rec.dgid); - path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); - path_rec.numb_path = 1; - path_rec.reversible = 1; - path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); - - comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | - IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | - IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; - - switch (cma_family(id_priv)) { - case AF_INET: - path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); - comp_mask |= IB_SA_PATH_REC_QOS_CLASS; - break; - case AF_INET6: - sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); - path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); - comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; - break; - case AF_IB: - sib = (struct sockaddr_ib *) cma_src_addr(id_priv); - path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); - comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; - break; - } - - id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, - id_priv->id.port_num, &path_rec, - comp_mask, timeout_ms, - GFP_KERNEL, cma_query_handler, - work, &id_priv->query); - - return (id_priv->query_id < 0) ? id_priv->query_id : 0; -} - -static void cma_work_handler(struct work_struct *_work) -{ - struct cma_work *work = container_of(_work, struct cma_work, work); - struct rdma_id_private *id_priv = work->id; - int destroy = 0; - - mutex_lock(&id_priv->handler_mutex); - if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) - goto out; - - if (id_priv->id.event_handler(&id_priv->id, &work->event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - destroy = 1; - } -out: - mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); - if (destroy) - rdma_destroy_id(&id_priv->id); - kfree(work); -} - -static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) -{ - struct rdma_route *route = &id_priv->id.route; - struct cma_work *work; - int ret; - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ROUTE_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; - - route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); - if (!route->path_rec) { - ret = -ENOMEM; - goto err1; - } - - ret = cma_query_ib_route(id_priv, timeout_ms, work); - if (ret) - goto err2; - - return 0; -err2: - kfree(route->path_rec); - route->path_rec = NULL; -err1: - kfree(work); - return ret; -} - -int rdma_set_ib_paths(struct rdma_cm_id *id, - struct ib_sa_path_rec *path_rec, int num_paths) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, - RDMA_CM_ROUTE_RESOLVED)) - return -EINVAL; - - id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, - GFP_KERNEL); - if (!id->route.path_rec) { - ret = -ENOMEM; - goto err; - } - - id->route.num_paths = num_paths; - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); - return ret; -} -EXPORT_SYMBOL(rdma_set_ib_paths); - -static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) -{ - struct cma_work *work; - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ROUTE_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; - queue_work(cma_wq, &work->work); - return 0; -} - -static int iboe_tos_to_sl(struct net_device *ndev, int tos) -{ - /* TODO: Implement this function */ - return 0; -} - -static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, - unsigned long supported_gids, - enum ib_gid_type default_gid) -{ - if ((network_type == RDMA_NETWORK_IPV4 || - network_type == RDMA_NETWORK_IPV6) && - test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) - return IB_GID_TYPE_ROCE_UDP_ENCAP; - - return default_gid; -} - -static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) -{ - struct rdma_route *route = &id_priv->id.route; - struct rdma_addr *addr = &route->addr; - struct cma_work *work; - int ret; - struct net_device *ndev = NULL; - - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - - route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); - if (!route->path_rec) { - ret = -ENOMEM; - goto err1; - } - - route->num_paths = 1; - - if (addr->dev_addr.bound_dev_if) { - unsigned long supported_gids; - - ndev = dev_get_by_index(addr->dev_addr.net, - addr->dev_addr.bound_dev_if); - if (!ndev) { - ret = -ENODEV; - goto err2; - } - - if (ndev->if_flags & IFF_LOOPBACK) { - dev_put(ndev); - if (!id_priv->id.device->get_netdev) { - ret = -EOPNOTSUPP; - goto err2; - } - - ndev = id_priv->id.device->get_netdev(id_priv->id.device, - id_priv->id.port_num); - if (!ndev) { - ret = -ENODEV; - goto err2; - } - } - - route->path_rec->net = ndev->if_vnet; - route->path_rec->ifindex = ndev->if_index; - supported_gids = roce_gid_type_mask_support(id_priv->id.device, - id_priv->id.port_num); - route->path_rec->gid_type = - cma_route_gid_type(addr->dev_addr.network, - supported_gids, - id_priv->gid_type); - } - if (!ndev) { - ret = -ENODEV; - goto err2; - } - - memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); - - rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, - &route->path_rec->sgid); - rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, - &route->path_rec->dgid); - - /* Use the hint from IP Stack to select GID Type */ - if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) - route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); - if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) - /* TODO: get the hoplimit from the inet/inet6 device */ - route->path_rec->hop_limit = addr->dev_addr.hoplimit; - else - route->path_rec->hop_limit = 1; - route->path_rec->reversible = 1; - route->path_rec->pkey = cpu_to_be16(0xffff); - route->path_rec->mtu_selector = IB_SA_EQ; - route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); - route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); - route->path_rec->rate_selector = IB_SA_EQ; - route->path_rec->rate = iboe_get_rate(ndev); - dev_put(ndev); - route->path_rec->packet_life_time_selector = IB_SA_EQ; - route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; - if (!route->path_rec->mtu) { - ret = -EINVAL; - goto err2; - } - - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ROUTE_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; - work->event.status = 0; - - queue_work(cma_wq, &work->work); - - return 0; - -err2: - kfree(route->path_rec); - route->path_rec = NULL; -err1: - kfree(work); - return ret; -} - -int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) - return -EINVAL; - - atomic_inc(&id_priv->refcount); - if (rdma_cap_ib_sa(id->device, id->port_num)) - ret = cma_resolve_ib_route(id_priv, timeout_ms); - else if (rdma_protocol_roce(id->device, id->port_num)) - ret = cma_resolve_iboe_route(id_priv); - else if (rdma_protocol_iwarp(id->device, id->port_num)) - ret = cma_resolve_iw_route(id_priv, timeout_ms); - else - ret = -ENOSYS; - - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); - cma_deref_id(id_priv); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_route); - -static void cma_set_loopback(struct sockaddr *addr) -{ - switch (addr->sa_family) { - case AF_INET: - ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - break; - case AF_INET6: - ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, - 0, 0, 0, htonl(1)); - break; - default: - ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, - 0, 0, 0, htonl(1)); - break; - } -} - -static int cma_bind_loopback(struct rdma_id_private *id_priv) -{ - struct cma_device *cma_dev, *cur_dev; - struct ib_port_attr port_attr; - union ib_gid gid; - u16 pkey; - int ret; - u8 p; - - cma_dev = NULL; - mutex_lock(&lock); - list_for_each_entry(cur_dev, &dev_list, list) { - if (cma_family(id_priv) == AF_IB && - !rdma_cap_ib_cm(cur_dev->device, 1)) - continue; - - if (!cma_dev) - cma_dev = cur_dev; - - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { - if (!ib_query_port(cur_dev->device, p, &port_attr) && - port_attr.state == IB_PORT_ACTIVE) { - cma_dev = cur_dev; - goto port_found; - } - } - } - - if (!cma_dev) { - ret = -ENODEV; - goto out; - } - - p = 1; - -port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); - if (ret) - goto out; - - ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); - if (ret) - goto out; - - id_priv->id.route.addr.dev_addr.dev_type = - (rdma_protocol_ib(cma_dev->device, p)) ? - ARPHRD_INFINIBAND : ARPHRD_ETHER; - - rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); - ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); - id_priv->id.port_num = p; - cma_attach_to_dev(id_priv, cma_dev); - cma_set_loopback(cma_src_addr(id_priv)); -out: - mutex_unlock(&lock); - return ret; -} - -static void addr_handler(int status, struct sockaddr *src_addr, - struct rdma_dev_addr *dev_addr, void *context) -{ - struct rdma_id_private *id_priv = context; - struct rdma_cm_event event; - - memset(&event, 0, sizeof event); - mutex_lock(&id_priv->handler_mutex); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, - RDMA_CM_ADDR_RESOLVED)) - goto out; - - memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); - if (!status && !id_priv->cma_dev) - status = cma_acquire_dev(id_priv, NULL); - - if (status) { - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, - RDMA_CM_ADDR_BOUND)) - goto out; - event.event = RDMA_CM_EVENT_ADDR_ERROR; - event.status = status; - } else - event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - - if (id_priv->id.event_handler(&id_priv->id, &event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); - rdma_destroy_id(&id_priv->id); - return; - } -out: - mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); -} - -static int cma_resolve_loopback(struct rdma_id_private *id_priv) -{ - struct cma_work *work; - union ib_gid gid; - int ret; - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - - if (!id_priv->cma_dev) { - ret = cma_bind_loopback(id_priv); - if (ret) - goto err; - } - - rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); - rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - work->old_state = RDMA_CM_ADDR_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - queue_work(cma_wq, &work->work); - return 0; -err: - kfree(work); - return ret; -} - -static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) -{ - struct cma_work *work; - int ret; - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - - if (!id_priv->cma_dev) { - ret = cma_resolve_ib_dev(id_priv); - if (ret) - goto err; - } - - rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) - &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); - - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - work->old_state = RDMA_CM_ADDR_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - queue_work(cma_wq, &work->work); - return 0; -err: - kfree(work); - return ret; -} - -static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr) -{ - if (!src_addr || !src_addr->sa_family) { - src_addr = (struct sockaddr *) &id->route.addr.src_addr; - src_addr->sa_family = dst_addr->sa_family; - if (dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; - struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr)) - id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *) src_addr)->sib_pkey = - ((struct sockaddr_ib *) dst_addr)->sib_pkey; - } - } - return rdma_bind_addr(id, src_addr); -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - struct sockaddr *dst_addr, int timeout_ms) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == RDMA_CM_IDLE) { - ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) - return ret; - } - - if (cma_family(id_priv) != dst_addr->sa_family) - return -EINVAL; - - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) - return -EINVAL; - - atomic_inc(&id_priv->refcount); - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), - dst_addr, &id->route.addr.dev_addr, - timeout_ms, addr_handler, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - cma_deref_id(id_priv); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - -int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) -{ - struct rdma_id_private *id_priv; - unsigned long flags; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - spin_lock_irqsave(&id_priv->lock, flags); - if (reuse || id_priv->state == RDMA_CM_IDLE) { - id_priv->reuseaddr = reuse; - ret = 0; - } else { - ret = -EINVAL; - } - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(rdma_set_reuseaddr); - -int rdma_set_afonly(struct rdma_cm_id *id, int afonly) -{ - struct rdma_id_private *id_priv; - unsigned long flags; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - spin_lock_irqsave(&id_priv->lock, flags); - if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { - id_priv->options |= (1 << CMA_OPTION_AFONLY); - id_priv->afonly = afonly; - ret = 0; - } else { - ret = -EINVAL; - } - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(rdma_set_afonly); - -static void cma_bind_port(struct rdma_bind_list *bind_list, - struct rdma_id_private *id_priv) -{ - struct sockaddr *addr; - struct sockaddr_ib *sib; - u64 sid, mask; - __be16 port; - - addr = cma_src_addr(id_priv); - port = htons(bind_list->port); - - switch (addr->sa_family) { - case AF_INET: - ((struct sockaddr_in *) addr)->sin_port = port; - break; - case AF_INET6: - ((struct sockaddr_in6 *) addr)->sin6_port = port; - break; - case AF_IB: - sib = (struct sockaddr_ib *) addr; - sid = be64_to_cpu(sib->sib_sid); - mask = be64_to_cpu(sib->sib_sid_mask); - sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); - sib->sib_sid_mask = cpu_to_be64(~0ULL); - break; - } - id_priv->bind_list = bind_list; - hlist_add_head(&id_priv->node, &bind_list->owners); -} - -static int cma_alloc_port(enum rdma_port_space ps, - struct rdma_id_private *id_priv, unsigned short snum) -{ - struct rdma_bind_list *bind_list; - int ret; - - bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); - if (!bind_list) - return -ENOMEM; - - ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, - snum); - if (ret < 0) - goto err; - - bind_list->ps = ps; - bind_list->port = (unsigned short)ret; - cma_bind_port(bind_list, id_priv); - return 0; -err: - kfree(bind_list); - return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; -} - -static int cma_alloc_any_port(enum rdma_port_space ps, - struct rdma_id_private *id_priv) -{ - static unsigned int last_used_port; - int low, high, remaining; - unsigned int rover; - struct vnet *net = id_priv->id.route.addr.dev_addr.net; - u32 rand; - - inet_get_local_port_range(net, &low, &high); - remaining = (high - low) + 1; - get_random_bytes(&rand, sizeof(rand)); - rover = rand % remaining + low; -retry: - if (last_used_port != rover && - !cma_ps_find(net, ps, (unsigned short)rover)) { - int ret = cma_alloc_port(ps, id_priv, rover); - /* - * Remember previously used port number in order to avoid - * re-using same port immediately after it is closed. - */ - if (!ret) - last_used_port = rover; - if (ret != -EADDRNOTAVAIL) - return ret; - } - if (--remaining) { - rover++; - if ((rover < low) || (rover > high)) - rover = low; - goto retry; - } - return -EADDRNOTAVAIL; -} - -/* - * Check that the requested port is available. This is called when trying to - * bind to a specific port, or when trying to listen on a bound port. In - * the latter case, the provided id_priv may already be on the bind_list, but - * we still need to check that it's okay to start listening. - */ -static int cma_check_port(struct rdma_bind_list *bind_list, - struct rdma_id_private *id_priv, uint8_t reuseaddr) -{ - struct rdma_id_private *cur_id; - struct sockaddr *addr, *cur_addr; - - addr = cma_src_addr(id_priv); - hlist_for_each_entry(cur_id, &bind_list->owners, node) { - if (id_priv == cur_id) - continue; - - if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && - cur_id->reuseaddr) - continue; - - cur_addr = cma_src_addr(cur_id); - if (id_priv->afonly && cur_id->afonly && - (addr->sa_family != cur_addr->sa_family)) - continue; - - if (cma_any_addr(addr) || cma_any_addr(cur_addr)) - return -EADDRNOTAVAIL; - - if (!cma_addr_cmp(addr, cur_addr)) - return -EADDRINUSE; - } - return 0; -} - -static int cma_use_port(enum rdma_port_space ps, - struct rdma_id_private *id_priv) -{ - struct rdma_bind_list *bind_list; - unsigned short snum; - int ret; - - snum = ntohs(cma_port(cma_src_addr(id_priv))); - if (snum < IPPORT_RESERVED && - priv_check(curthread, PRIV_NETINET_BINDANY) != 0) - return -EACCES; - - bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); - if (!bind_list) { - ret = cma_alloc_port(ps, id_priv, snum); - } else { - ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); - if (!ret) - cma_bind_port(bind_list, id_priv); - } - return ret; -} - -static int cma_bind_listen(struct rdma_id_private *id_priv) -{ - struct rdma_bind_list *bind_list = id_priv->bind_list; - int ret = 0; - - mutex_lock(&lock); - if (bind_list->owners.first->next) - ret = cma_check_port(bind_list, id_priv, 0); - mutex_unlock(&lock); - return ret; -} - -static enum rdma_port_space cma_select_inet_ps( - struct rdma_id_private *id_priv) -{ - switch (id_priv->id.ps) { - case RDMA_PS_TCP: - case RDMA_PS_UDP: - case RDMA_PS_IPOIB: - case RDMA_PS_IB: - return id_priv->id.ps; - default: - - return 0; - } -} - -static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) -{ - enum rdma_port_space ps = 0; - struct sockaddr_ib *sib; - u64 sid_ps, mask, sid; - - sib = (struct sockaddr_ib *) cma_src_addr(id_priv); - mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; - sid = be64_to_cpu(sib->sib_sid) & mask; - - if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { - sid_ps = RDMA_IB_IP_PS_IB; - ps = RDMA_PS_IB; - } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && - (sid == (RDMA_IB_IP_PS_TCP & mask))) { - sid_ps = RDMA_IB_IP_PS_TCP; - ps = RDMA_PS_TCP; - } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && - (sid == (RDMA_IB_IP_PS_UDP & mask))) { - sid_ps = RDMA_IB_IP_PS_UDP; - ps = RDMA_PS_UDP; - } - - if (ps) { - sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); - sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | - be64_to_cpu(sib->sib_sid_mask)); - } - return ps; -} - -static int cma_get_port(struct rdma_id_private *id_priv) -{ - enum rdma_port_space ps; - int ret; - - if (cma_family(id_priv) != AF_IB) - ps = cma_select_inet_ps(id_priv); - else - ps = cma_select_ib_ps(id_priv); - if (!ps) - return -EPROTONOSUPPORT; - - mutex_lock(&lock); - if (cma_any_port(cma_src_addr(id_priv))) - ret = cma_alloc_any_port(ps, id_priv); - else - ret = cma_use_port(ps, id_priv); - mutex_unlock(&lock); - - return ret; -} - -static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, - struct sockaddr *addr) -{ -#ifdef INET6 - struct sockaddr_in6 sin6; - - if (addr->sa_family != AF_INET6) - return 0; - - sin6 = *(struct sockaddr_in6 *)addr; - - if (!(IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr))) - return 0; - - if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0) - return -EINVAL; - - dev_addr->bound_dev_if = sin6.sin6_scope_id; -#endif - return 0; -} - -int rdma_listen(struct rdma_cm_id *id, int backlog) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == RDMA_CM_IDLE) { - id->route.addr.src_addr.ss_family = AF_INET; - ret = rdma_bind_addr(id, cma_src_addr(id_priv)); - if (ret) - return ret; - } - - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) - return -EINVAL; - - if (id_priv->reuseaddr) { - ret = cma_bind_listen(id_priv); - if (ret) - goto err; - } - - id_priv->backlog = backlog; - if (id->device) { - if (rdma_cap_ib_cm(id->device, 1)) { - ret = cma_ib_listen(id_priv); - if (ret) - goto err; - } else if (rdma_cap_iw_cm(id->device, 1)) { - ret = cma_iw_listen(id_priv, backlog); - if (ret) - goto err; - } else { - ret = -ENOSYS; - goto err; - } - } else - cma_listen_on_all(id_priv); - - return 0; -err: - id_priv->backlog = 0; - cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); - return ret; -} -EXPORT_SYMBOL(rdma_listen); - -int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) -{ - struct rdma_id_private *id_priv; - int ret; - - if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && - addr->sa_family != AF_IB) - return -EAFNOSUPPORT; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) - return -EINVAL; - - ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); - if (ret) - goto err1; - - memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); - if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); - if (ret) - goto err1; - - ret = cma_acquire_dev(id_priv, NULL); - if (ret) - goto err1; - } - - if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { - if (addr->sa_family == AF_INET) - id_priv->afonly = 1; -#ifdef INET6 - else if (addr->sa_family == AF_INET6) { - CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); - id_priv->afonly = V_ip6_v6only; - CURVNET_RESTORE(); - } -#endif - } - ret = cma_get_port(id_priv); - if (ret) - goto err2; - - return 0; -err2: - if (id_priv->cma_dev) - cma_release_dev(id_priv); -err1: - cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); - return ret; -} -EXPORT_SYMBOL(rdma_bind_addr); - -static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) -{ - struct cma_hdr *cma_hdr; - - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - if (cma_family(id_priv) == AF_INET) { - struct sockaddr_in *src4, *dst4; - - src4 = (struct sockaddr_in *) cma_src_addr(id_priv); - dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); - - cma_set_ip_ver(cma_hdr, 4); - cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - cma_hdr->port = src4->sin_port; - } else if (cma_family(id_priv) == AF_INET6) { - struct sockaddr_in6 *src6, *dst6; - - src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); - dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); - - cma_set_ip_ver(cma_hdr, 6); - cma_hdr->src_addr.ip6 = src6->sin6_addr; - cma_hdr->dst_addr.ip6 = dst6->sin6_addr; - cma_hdr->port = src6->sin6_port; - } - return 0; -} - -static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *ib_event) -{ - struct rdma_id_private *id_priv = cm_id->context; - struct rdma_cm_event event; - struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; - int ret = 0; - - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_CONNECT) - goto out; - - memset(&event, 0, sizeof event); - switch (ib_event->event) { - case IB_CM_SIDR_REQ_ERROR: - event.event = RDMA_CM_EVENT_UNREACHABLE; - event.status = -ETIMEDOUT; - break; - case IB_CM_SIDR_REP_RECEIVED: - event.param.ud.private_data = ib_event->private_data; - event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; - if (rep->status != IB_SIDR_SUCCESS) { - event.event = RDMA_CM_EVENT_UNREACHABLE; - event.status = ib_event->param.sidr_rep_rcvd.status; - break; - } - ret = cma_set_qkey(id_priv, rep->qkey); - if (ret) { - event.event = RDMA_CM_EVENT_ADDR_ERROR; - event.status = ret; - break; - } - ret = ib_init_ah_from_path(id_priv->id.device, - id_priv->id.port_num, - id_priv->id.route.path_rec, - &event.param.ud.ah_attr); - if (ret) { - event.event = RDMA_CM_EVENT_ADDR_ERROR; - event.status = ret; - break; - } - event.param.ud.qp_num = rep->qpn; - event.param.ud.qkey = rep->qkey; - event.event = RDMA_CM_EVENT_ESTABLISHED; - event.status = 0; - break; - default: - pr_err("RDMA CMA: unexpected IB CM event: %d\n", - ib_event->event); - goto out; - } - - ret = id_priv->id.event_handler(&id_priv->id, &event); - if (ret) { - /* Destroy the CM ID by returning a non-zero value. */ - id_priv->cm_id.ib = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); - return ret; - } -out: - mutex_unlock(&id_priv->handler_mutex); - return ret; -} - -static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct ib_cm_sidr_req_param req; - struct ib_cm_id *id; - void *private_data; - int offset, ret; - - memset(&req, 0, sizeof req); - offset = cma_user_data_offset(id_priv); - req.private_data_len = offset + conn_param->private_data_len; - if (req.private_data_len < conn_param->private_data_len) - return -EINVAL; - - if (req.private_data_len) { - private_data = kzalloc(req.private_data_len, GFP_ATOMIC); - if (!private_data) - return -ENOMEM; - } else { - private_data = NULL; - } - - if (conn_param->private_data && conn_param->private_data_len) - memcpy((char *)private_data + offset, conn_param->private_data, - conn_param->private_data_len); - - if (private_data) { - ret = cma_format_hdr(private_data, id_priv); - if (ret) - goto out; - req.private_data = private_data; - } - - id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, - id_priv); - if (IS_ERR(id)) { - ret = PTR_ERR(id); - goto out; - } - id_priv->cm_id.ib = id; - - req.path = id_priv->id.route.path_rec; - req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); - req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); - req.max_cm_retries = CMA_MAX_CM_RETRIES; - - ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); - if (ret) { - ib_destroy_cm_id(id_priv->cm_id.ib); - id_priv->cm_id.ib = NULL; - } -out: - kfree(private_data); - return ret; -} - -static int cma_connect_ib(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct ib_cm_req_param req; - struct rdma_route *route; - void *private_data; - struct ib_cm_id *id; - int offset, ret; - - memset(&req, 0, sizeof req); - offset = cma_user_data_offset(id_priv); - req.private_data_len = offset + conn_param->private_data_len; - if (req.private_data_len < conn_param->private_data_len) - return -EINVAL; - - if (req.private_data_len) { - private_data = kzalloc(req.private_data_len, GFP_ATOMIC); - if (!private_data) - return -ENOMEM; - } else { - private_data = NULL; - } - - if (conn_param->private_data && conn_param->private_data_len) - memcpy((char *)private_data + offset, conn_param->private_data, - conn_param->private_data_len); - - id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); - if (IS_ERR(id)) { - ret = PTR_ERR(id); - goto out; - } - id_priv->cm_id.ib = id; - - route = &id_priv->id.route; - if (private_data) { - ret = cma_format_hdr(private_data, id_priv); - if (ret) - goto out; - req.private_data = private_data; - } - - req.primary_path = &route->path_rec[0]; - if (route->num_paths == 2) - req.alternate_path = &route->path_rec[1]; - - req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); - req.qp_num = id_priv->qp_num; - req.qp_type = id_priv->id.qp_type; - req.starting_psn = id_priv->seq_num; - req.responder_resources = conn_param->responder_resources; - req.initiator_depth = conn_param->initiator_depth; - req.flow_control = conn_param->flow_control; - req.retry_count = min_t(u8, 7, conn_param->retry_count); - req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); - req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; - req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; - req.max_cm_retries = CMA_MAX_CM_RETRIES; - req.srq = id_priv->srq ? 1 : 0; - - ret = ib_send_cm_req(id_priv->cm_id.ib, &req); -out: - if (ret && !IS_ERR(id)) { - ib_destroy_cm_id(id); - id_priv->cm_id.ib = NULL; - } - - kfree(private_data); - return ret; -} - -static int cma_connect_iw(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct iw_cm_id *cm_id; - int ret; - struct iw_cm_conn_param iw_param; - - cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); - if (IS_ERR(cm_id)) - return PTR_ERR(cm_id); - - cm_id->tos = id_priv->tos; - id_priv->cm_id.iw = cm_id; - - memcpy(&cm_id->local_addr, cma_src_addr(id_priv), - rdma_addr_size(cma_src_addr(id_priv))); - memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), - rdma_addr_size(cma_dst_addr(id_priv))); - - ret = cma_modify_qp_rtr(id_priv, conn_param); - if (ret) - goto out; - - if (conn_param) { - iw_param.ord = conn_param->initiator_depth; - iw_param.ird = conn_param->responder_resources; - iw_param.private_data = conn_param->private_data; - iw_param.private_data_len = conn_param->private_data_len; - iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; - } else { - memset(&iw_param, 0, sizeof iw_param); - iw_param.qpn = id_priv->qp_num; - } - ret = iw_cm_connect(cm_id, &iw_param); -out: - if (ret) { - iw_destroy_cm_id(cm_id); - id_priv->cm_id.iw = NULL; - } - return ret; -} - -int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) - return -EINVAL; - - if (!id->qp) { - id_priv->qp_num = conn_param->qp_num; - id_priv->srq = conn_param->srq; - } - - if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (id->qp_type == IB_QPT_UD) - ret = cma_resolve_ib_udp(id_priv, conn_param); - else - ret = cma_connect_ib(id_priv, conn_param); - } else if (rdma_cap_iw_cm(id->device, id->port_num)) - ret = cma_connect_iw(id_priv, conn_param); - else - ret = -ENOSYS; - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); - return ret; -} -EXPORT_SYMBOL(rdma_connect); - -static int cma_accept_ib(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct ib_cm_rep_param rep; - int ret; - - ret = cma_modify_qp_rtr(id_priv, conn_param); - if (ret) - goto out; - - ret = cma_modify_qp_rts(id_priv, conn_param); - if (ret) - goto out; - - memset(&rep, 0, sizeof rep); - rep.qp_num = id_priv->qp_num; - rep.starting_psn = id_priv->seq_num; - rep.private_data = conn_param->private_data; - rep.private_data_len = conn_param->private_data_len; - rep.responder_resources = conn_param->responder_resources; - rep.initiator_depth = conn_param->initiator_depth; - rep.failover_accepted = 0; - rep.flow_control = conn_param->flow_control; - rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); - rep.srq = id_priv->srq ? 1 : 0; - - ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); -out: - return ret; -} - -static int cma_accept_iw(struct rdma_id_private *id_priv, - struct rdma_conn_param *conn_param) -{ - struct iw_cm_conn_param iw_param; - int ret; - - ret = cma_modify_qp_rtr(id_priv, conn_param); - if (ret) - return ret; - - iw_param.ord = conn_param->initiator_depth; - iw_param.ird = conn_param->responder_resources; - iw_param.private_data = conn_param->private_data; - iw_param.private_data_len = conn_param->private_data_len; - if (id_priv->id.qp) { - iw_param.qpn = id_priv->qp_num; - } else - iw_param.qpn = conn_param->qp_num; - - return iw_cm_accept(id_priv->cm_id.iw, &iw_param); -} - -static int cma_send_sidr_rep(struct rdma_id_private *id_priv, - enum ib_cm_sidr_status status, u32 qkey, - const void *private_data, int private_data_len) -{ - struct ib_cm_sidr_rep_param rep; - int ret; - - memset(&rep, 0, sizeof rep); - rep.status = status; - if (status == IB_SIDR_SUCCESS) { - ret = cma_set_qkey(id_priv, qkey); - if (ret) - return ret; - rep.qp_num = id_priv->qp_num; - rep.qkey = id_priv->qkey; - } - rep.private_data = private_data; - rep.private_data_len = private_data_len; - - return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); -} - -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - - id_priv->owner = task_pid_nr(current); - - if (!cma_comp(id_priv, RDMA_CM_CONNECT)) - return -EINVAL; - - if (!id->qp && conn_param) { - id_priv->qp_num = conn_param->qp_num; - id_priv->srq = conn_param->srq; - } - - if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (id->qp_type == IB_QPT_UD) { - if (conn_param) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - conn_param->qkey, - conn_param->private_data, - conn_param->private_data_len); - else - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - 0, NULL, 0); - } else { - if (conn_param) - ret = cma_accept_ib(id_priv, conn_param); - else - ret = cma_rep_recv(id_priv); - } - } else if (rdma_cap_iw_cm(id->device, id->port_num)) - ret = cma_accept_iw(id_priv, conn_param); - else - ret = -ENOSYS; - - if (ret) - goto reject; - - return 0; -reject: - cma_modify_qp_err(id_priv); - rdma_reject(id, NULL, 0); - return ret; -} -EXPORT_SYMBOL(rdma_accept); - -int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!id_priv->cm_id.ib) - return -EINVAL; - - switch (id->device->node_type) { - case RDMA_NODE_IB_CA: - ret = ib_cm_notify(id_priv->cm_id.ib, event); - break; - default: - ret = 0; - break; - } - return ret; -} -EXPORT_SYMBOL(rdma_notify); - -int rdma_reject(struct rdma_cm_id *id, const void *private_data, - u8 private_data_len) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!id_priv->cm_id.ib) - return -EINVAL; - - if (rdma_cap_ib_cm(id->device, id->port_num)) { - if (id->qp_type == IB_QPT_UD) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, - private_data, private_data_len); - else - ret = ib_send_cm_rej(id_priv->cm_id.ib, - IB_CM_REJ_CONSUMER_DEFINED, NULL, - 0, private_data, private_data_len); - } else if (rdma_cap_iw_cm(id->device, id->port_num)) { - ret = iw_cm_reject(id_priv->cm_id.iw, - private_data, private_data_len); - } else - ret = -ENOSYS; - - return ret; -} -EXPORT_SYMBOL(rdma_reject); - -int rdma_disconnect(struct rdma_cm_id *id) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!id_priv->cm_id.ib) - return -EINVAL; - - if (rdma_cap_ib_cm(id->device, id->port_num)) { - ret = cma_modify_qp_err(id_priv); - if (ret) - goto out; - /* Initiate or respond to a disconnect. */ - if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) - ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); - } else if (rdma_cap_iw_cm(id->device, id->port_num)) { - ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); - } else - ret = -EINVAL; - -out: - return ret; -} -EXPORT_SYMBOL(rdma_disconnect); - -static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) -{ - struct rdma_id_private *id_priv; - struct cma_multicast *mc = multicast->context; - struct rdma_cm_event event; - int ret = 0; - - id_priv = mc->id_priv; - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_ADDR_BOUND && - id_priv->state != RDMA_CM_ADDR_RESOLVED) - goto out; - - if (!status) - status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); - mutex_lock(&id_priv->qp_mutex); - if (!status && id_priv->id.qp) - status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, - be16_to_cpu(multicast->rec.mlid)); - mutex_unlock(&id_priv->qp_mutex); - - memset(&event, 0, sizeof event); - event.status = status; - event.param.ud.private_data = mc->context; - if (!status) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = - dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - enum ib_gid_type gid_type = - id_priv->cma_dev->default_gid_type[id_priv->id.port_num - - rdma_start_port(id_priv->cma_dev->device)]; - - event.event = RDMA_CM_EVENT_MULTICAST_JOIN; - ib_init_ah_from_mcmember(id_priv->id.device, - id_priv->id.port_num, &multicast->rec, - ndev, gid_type, - &event.param.ud.ah_attr); - event.param.ud.qp_num = 0xFFFFFF; - event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); - if (ndev) - dev_put(ndev); - } else - event.event = RDMA_CM_EVENT_MULTICAST_ERROR; - - ret = id_priv->id.event_handler(&id_priv->id, &event); - if (ret) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); - return 0; - } - -out: - mutex_unlock(&id_priv->handler_mutex); - return 0; -} - -static void cma_set_mgid(struct rdma_id_private *id_priv, - struct sockaddr *addr, union ib_gid *mgid) -{ - unsigned char mc_map[MAX_ADDR_LEN]; - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct sockaddr_in *sin = (struct sockaddr_in *) addr; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; - - if (cma_any_addr(addr)) { - memset(mgid, 0, sizeof *mgid); - } else if ((addr->sa_family == AF_INET6) && - ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == - 0xFF10A01B)) { - /* IPv6 address is an SA assigned MGID. */ - memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); - } else if (addr->sa_family == AF_IB) { - memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); - } else if (addr->sa_family == AF_INET6) { - ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); - if (id_priv->id.ps == RDMA_PS_UDP) - mc_map[7] = 0x01; /* Use RDMA CM signature */ - *mgid = *(union ib_gid *) (mc_map + 4); - } else { - ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); - if (id_priv->id.ps == RDMA_PS_UDP) - mc_map[7] = 0x01; /* Use RDMA CM signature */ - *mgid = *(union ib_gid *) (mc_map + 4); - } -} - -static void cma_query_sa_classport_info_cb(int status, - struct ib_class_port_info *rec, - void *context) -{ - struct class_port_info_context *cb_ctx = context; - - WARN_ON(!context); - - if (status || !rec) { - pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", - cb_ctx->device->name, cb_ctx->port_num, status); - goto out; - } - - memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); - -out: - complete(&cb_ctx->done); -} - -static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, - struct ib_class_port_info *class_port_info) -{ - struct class_port_info_context *cb_ctx; - int ret; - - cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); - if (!cb_ctx) - return -ENOMEM; - - cb_ctx->device = device; - cb_ctx->class_port_info = class_port_info; - cb_ctx->port_num = port_num; - init_completion(&cb_ctx->done); - - ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, - CMA_QUERY_CLASSPORT_INFO_TIMEOUT, - GFP_KERNEL, cma_query_sa_classport_info_cb, - cb_ctx, &cb_ctx->sa_query); - if (ret < 0) { - pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", - device->name, port_num, ret); - goto out; - } - - wait_for_completion(&cb_ctx->done); - -out: - kfree(cb_ctx); - return ret; -} - -static int cma_join_ib_multicast(struct rdma_id_private *id_priv, - struct cma_multicast *mc) -{ - struct ib_sa_mcmember_rec rec; - struct ib_class_port_info class_port_info; - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - ib_sa_comp_mask comp_mask; - int ret; - - ib_addr_get_mgid(dev_addr, &rec.mgid); - ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, - &rec.mgid, &rec); - if (ret) - return ret; - - ret = cma_set_qkey(id_priv, 0); - if (ret) - return ret; - - cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); - rec.qkey = cpu_to_be32(id_priv->qkey); - rdma_addr_get_sgid(dev_addr, &rec.port_gid); - rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); - rec.join_state = mc->join_state; - - if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { - ret = cma_query_sa_classport_info(id_priv->id.device, - id_priv->id.port_num, - &class_port_info); - - if (ret) - return ret; - - if (!(ib_get_cpi_capmask2(&class_port_info) & - IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { - pr_warn("RDMA CM: %s port %u Unable to multicast join\n" - "RDMA CM: SM doesn't support Send Only Full Member option\n", - id_priv->id.device->name, id_priv->id.port_num); - return -EOPNOTSUPP; - } - } - - comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | - IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | - IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | - IB_SA_MCMEMBER_REC_FLOW_LABEL | - IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; - - if (id_priv->id.ps == RDMA_PS_IPOIB) - comp_mask |= IB_SA_MCMEMBER_REC_RATE | - IB_SA_MCMEMBER_REC_RATE_SELECTOR | - IB_SA_MCMEMBER_REC_MTU_SELECTOR | - IB_SA_MCMEMBER_REC_MTU | - IB_SA_MCMEMBER_REC_HOP_LIMIT; - - mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, - id_priv->id.port_num, &rec, - comp_mask, GFP_KERNEL, - cma_ib_mc_handler, mc); - return PTR_ERR_OR_ZERO(mc->multicast.ib); -} - -static void iboe_mcast_work_handler(struct work_struct *work) -{ - struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); - struct cma_multicast *mc = mw->mc; - struct ib_sa_multicast *m = mc->multicast.ib; - - mc->multicast.ib->context = mc; - cma_ib_mc_handler(0, m); - kref_put(&mc->mcref, release_mc); - kfree(mw); -} - -static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) -{ - struct sockaddr_in *sin = (struct sockaddr_in *)addr; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; - - if (cma_any_addr(addr)) { - memset(mgid, 0, sizeof *mgid); - } else if (addr->sa_family == AF_INET6) { - memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); - } else { - mgid->raw[0] = 0xff; - mgid->raw[1] = 0x0e; - mgid->raw[2] = 0; - mgid->raw[3] = 0; - mgid->raw[4] = 0; - mgid->raw[5] = 0; - mgid->raw[6] = 0; - mgid->raw[7] = 0; - mgid->raw[8] = 0; - mgid->raw[9] = 0; - mgid->raw[10] = 0xff; - mgid->raw[11] = 0xff; - *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; - } -} - -static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, - struct cma_multicast *mc) -{ - struct iboe_mcast_work *work; - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - int err = 0; - struct sockaddr *addr = (struct sockaddr *)&mc->addr; - struct net_device *ndev = NULL; - enum ib_gid_type gid_type; - bool send_only; - - send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); - - if (cma_zero_addr((struct sockaddr *)&mc->addr)) - return -EINVAL; - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - - mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); - if (!mc->multicast.ib) { - err = -ENOMEM; - goto out1; - } - - cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); - - mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); - if (id_priv->id.ps == RDMA_PS_UDP) - mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (!ndev) { - err = -ENODEV; - goto out2; - } - mc->multicast.ib->rec.rate = iboe_get_rate(ndev); - mc->multicast.ib->rec.hop_limit = 1; - mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); - - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - - rdma_start_port(id_priv->cma_dev->device)]; - if (addr->sa_family == AF_INET) { - if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; - if (!send_only) { - mc->igmp_joined = true; - } - } - } else { - if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) - err = -ENOTSUPP; - } - dev_put(ndev); - if (err || !mc->multicast.ib->rec.mtu) { - if (!err) - err = -EINVAL; - goto out2; - } - rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, - &mc->multicast.ib->rec.port_gid); - work->id = id_priv; - work->mc = mc; - INIT_WORK(&work->work, iboe_mcast_work_handler); - kref_get(&mc->mcref); - queue_work(cma_wq, &work->work); - - return 0; - -out2: - kfree(mc->multicast.ib); -out1: - kfree(work); - return err; -} - -int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, - u8 join_state, void *context) -{ - struct rdma_id_private *id_priv; - struct cma_multicast *mc; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && - !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) - return -EINVAL; - - mc = kmalloc(sizeof *mc, GFP_KERNEL); - if (!mc) - return -ENOMEM; - - memcpy(&mc->addr, addr, rdma_addr_size(addr)); - mc->context = context; - mc->id_priv = id_priv; - mc->igmp_joined = false; - mc->join_state = join_state; - spin_lock(&id_priv->lock); - list_add(&mc->list, &id_priv->mc_list); - spin_unlock(&id_priv->lock); - - if (rdma_protocol_roce(id->device, id->port_num)) { - kref_init(&mc->mcref); - ret = cma_iboe_join_multicast(id_priv, mc); - } else if (rdma_cap_ib_mcast(id->device, id->port_num)) - ret = cma_join_ib_multicast(id_priv, mc); - else - ret = -ENOSYS; - - if (ret) { - spin_lock_irq(&id_priv->lock); - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - kfree(mc); - } - return ret; -} -EXPORT_SYMBOL(rdma_join_multicast); - -void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) -{ - struct rdma_id_private *id_priv; - struct cma_multicast *mc; - - id_priv = container_of(id, struct rdma_id_private, id); - spin_lock_irq(&id_priv->lock); - list_for_each_entry(mc, &id_priv->mc_list, list) { - if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - - if (id->qp) - ib_detach_mcast(id->qp, - &mc->multicast.ib->rec.mgid, - be16_to_cpu(mc->multicast.ib->rec.mlid)); - - BUG_ON(id_priv->cma_dev->device != id->device); - - if (rdma_cap_ib_mcast(id->device, id->port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else if (rdma_protocol_roce(id->device, id->port_num)) { - if (mc->igmp_joined) { - struct rdma_dev_addr *dev_addr = - &id->route.addr.dev_addr; - struct net_device *ndev = NULL; - - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, - dev_addr->bound_dev_if); - if (ndev) { - dev_put(ndev); - } - mc->igmp_joined = false; - } - kref_put(&mc->mcref, release_mc); - } - return; - } - } - spin_unlock_irq(&id_priv->lock); -} -EXPORT_SYMBOL(rdma_leave_multicast); - -static int -sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) -{ - struct cma_device *cma_dev = arg1; - const int port = arg2; - char buf[64]; - int error; - - strlcpy(buf, ib_cache_gid_type_str( - cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); - - error = sysctl_handle_string(oidp, buf, sizeof(buf), req); - if (error != 0 || req->newptr == NULL) - goto done; - - error = ib_cache_gid_parse_type_str(buf); - if (error < 0) { - error = EINVAL; - goto done; - } - - cma_set_default_gid_type(cma_dev, port, error); - error = 0; -done: - return (error); -} - -static void cma_add_one(struct ib_device *device) -{ - struct cma_device *cma_dev; - struct rdma_id_private *id_priv; - unsigned int i; - unsigned long supported_gids = 0; - - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); - if (!cma_dev) - return; - - sysctl_ctx_init(&cma_dev->sysctl_ctx); - - cma_dev->device = device; - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_gid_type), - GFP_KERNEL); - if (!cma_dev->default_gid_type) { - kfree(cma_dev); - return; - } - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { - supported_gids = roce_gid_type_mask_support(device, i); - WARN_ON(!supported_gids); - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); - } - - init_completion(&cma_dev->comp); - atomic_set(&cma_dev->refcount, 1); - INIT_LIST_HEAD(&cma_dev->id_list); - ib_set_client_data(device, &cma_client, cma_dev); - - mutex_lock(&lock); - list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); - - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { - char buf[64]; - - snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); - - (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, - SYSCTL_CHILDREN(device->ports_parent->parent->oidp), - OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, - cma_dev, i, &sysctl_cma_default_roce_mode, "A", - "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); - } -} - -static int cma_remove_id_dev(struct rdma_id_private *id_priv) -{ - struct rdma_cm_event event; - enum rdma_cm_state state; - int ret = 0; - - /* Record that we want to remove the device */ - state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); - if (state == RDMA_CM_DESTROYING) - return 0; - - cma_cancel_operation(id_priv, state); - mutex_lock(&id_priv->handler_mutex); - - /* Check for destruction from another callback. */ - if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) - goto out; - - memset(&event, 0, sizeof event); - event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - ret = id_priv->id.event_handler(&id_priv->id, &event); -out: - mutex_unlock(&id_priv->handler_mutex); - return ret; -} - -static void cma_process_remove(struct cma_device *cma_dev) -{ - struct rdma_id_private *id_priv; - int ret; - - mutex_lock(&lock); - while (!list_empty(&cma_dev->id_list)) { - id_priv = list_entry(cma_dev->id_list.next, - struct rdma_id_private, list); - - list_del(&id_priv->listen_list); - list_del_init(&id_priv->list); - atomic_inc(&id_priv->refcount); - mutex_unlock(&lock); - - ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); - cma_deref_id(id_priv); - if (ret) - rdma_destroy_id(&id_priv->id); - - mutex_lock(&lock); - } - mutex_unlock(&lock); - - cma_deref_dev(cma_dev); - wait_for_completion(&cma_dev->comp); -} - -static void cma_remove_one(struct ib_device *device, void *client_data) -{ - struct cma_device *cma_dev = client_data; - - if (!cma_dev) - return; - - mutex_lock(&lock); - list_del(&cma_dev->list); - mutex_unlock(&lock); - - cma_process_remove(cma_dev); - sysctl_ctx_free(&cma_dev->sysctl_ctx); - kfree(cma_dev->default_gid_type); - kfree(cma_dev); -} - -static void cma_init_vnet(void *arg) -{ - struct cma_pernet *pernet = &VNET(cma_pernet); - - idr_init(&pernet->tcp_ps); - idr_init(&pernet->udp_ps); - idr_init(&pernet->ipoib_ps); - idr_init(&pernet->ib_ps); -} -VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); - -static void cma_destroy_vnet(void *arg) -{ - struct cma_pernet *pernet = &VNET(cma_pernet); - - idr_destroy(&pernet->tcp_ps); - idr_destroy(&pernet->udp_ps); - idr_destroy(&pernet->ipoib_ps); - idr_destroy(&pernet->ib_ps); -} -VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); - -static int __init cma_init(void) -{ - int ret; - - cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); - if (!cma_wq) - return -ENOMEM; - - ib_sa_register_client(&sa_client); - rdma_addr_register_client(&addr_client); - - ret = ib_register_client(&cma_client); - if (ret) - goto err; - - cma_configfs_init(); - - return 0; - -err: - rdma_addr_unregister_client(&addr_client); - ib_sa_unregister_client(&sa_client); - destroy_workqueue(cma_wq); - return ret; -} - -static void __exit cma_cleanup(void) -{ - cma_configfs_exit(); - ib_unregister_client(&cma_client); - rdma_addr_unregister_client(&addr_client); - ib_sa_unregister_client(&sa_client); - destroy_workqueue(cma_wq); -} - -module_init(cma_init); -module_exit(cma_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cma.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/fmr_pool.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/fmr_pool.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/fmr_pool.c (nonexistent) @@ -1,520 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include - -#include "core_priv.h" - -#define PFX "fmr_pool: " - -enum { - IB_FMR_MAX_REMAPS = 32, - - IB_FMR_HASH_BITS = 8, - IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, - IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 -}; - -/* - * If an FMR is not in use, then the list member will point to either - * its pool's free_list (if the FMR can be mapped again; that is, - * remap_count < pool->max_remaps) or its pool's dirty_list (if the - * FMR needs to be unmapped before being remapped). In either of - * these cases it is a bug if the ref_count is not 0. In other words, - * if ref_count is > 0, then the list member must not be linked into - * either free_list or dirty_list. - * - * The cache_node member is used to link the FMR into a cache bucket - * (if caching is enabled). This is independent of the reference - * count of the FMR. When a valid FMR is released, its ref_count is - * decremented, and if ref_count reaches 0, the FMR is placed in - * either free_list or dirty_list as appropriate. However, it is not - * removed from the cache and may be "revived" if a call to - * ib_fmr_register_physical() occurs before the FMR is remapped. In - * this case we just increment the ref_count and remove the FMR from - * free_list/dirty_list. - * - * Before we remap an FMR from free_list, we remove it from the cache - * (to prevent another user from obtaining a stale FMR). When an FMR - * is released, we add it to the tail of the free list, so that our - * cache eviction policy is "least recently used." - * - * All manipulation of ref_count, list and cache_node is protected by - * pool_lock to maintain consistency. - */ - -struct ib_fmr_pool { - spinlock_t pool_lock; - - int pool_size; - int max_pages; - int max_remaps; - int dirty_watermark; - int dirty_len; - struct list_head free_list; - struct list_head dirty_list; - struct hlist_head *cache_bucket; - - void (*flush_function)(struct ib_fmr_pool *pool, - void * arg); - void *flush_arg; - - struct task_struct *thread; - - atomic_t req_ser; - atomic_t flush_ser; - - wait_queue_head_t force_wait; -}; - -static inline u32 ib_fmr_hash(u64 first_page) -{ - return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & - (IB_FMR_HASH_SIZE - 1); -} - -/* Caller must hold pool_lock */ -static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, - u64 *page_list, - int page_list_len, - u64 io_virtual_address) -{ - struct hlist_head *bucket; - struct ib_pool_fmr *fmr; - - if (!pool->cache_bucket) - return NULL; - - bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - - hlist_for_each_entry(fmr, bucket, cache_node) - if (io_virtual_address == fmr->io_virtual_address && - page_list_len == fmr->page_list_len && - !memcmp(page_list, fmr->page_list, - page_list_len * sizeof *page_list)) - return fmr; - - return NULL; -} - -static void ib_fmr_batch_release(struct ib_fmr_pool *pool) -{ - int ret; - struct ib_pool_fmr *fmr; - LIST_HEAD(unmap_list); - LIST_HEAD(fmr_list); - - spin_lock_irq(&pool->pool_lock); - - list_for_each_entry(fmr, &pool->dirty_list, list) { - hlist_del_init(&fmr->cache_node); - fmr->remap_count = 0; - list_add_tail(&fmr->fmr->list, &fmr_list); - -#ifdef DEBUG - if (fmr->ref_count !=0) { - pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n", - fmr, fmr->ref_count); - } -#endif - } - - list_splice_init(&pool->dirty_list, &unmap_list); - pool->dirty_len = 0; - - spin_unlock_irq(&pool->pool_lock); - - if (list_empty(&unmap_list)) { - return; - } - - ret = ib_unmap_fmr(&fmr_list); - if (ret) - pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); - - spin_lock_irq(&pool->pool_lock); - list_splice(&unmap_list, &pool->free_list); - spin_unlock_irq(&pool->pool_lock); -} - -static int ib_fmr_cleanup_thread(void *pool_ptr) -{ - struct ib_fmr_pool *pool = pool_ptr; - - do { - if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { - ib_fmr_batch_release(pool); - - atomic_inc(&pool->flush_ser); - wake_up_interruptible(&pool->force_wait); - - if (pool->flush_function) - pool->flush_function(pool, pool->flush_arg); - } - - set_current_state(TASK_INTERRUPTIBLE); - if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && - !kthread_should_stop()) - schedule(); - __set_current_state(TASK_RUNNING); - } while (!kthread_should_stop()); - - return 0; -} - -/** - * ib_create_fmr_pool - Create an FMR pool - * @pd:Protection domain for FMRs - * @params:FMR pool parameters - * - * Create a pool of FMRs. Return value is pointer to new pool or - * error code if creation failed. - */ -struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, - struct ib_fmr_pool_param *params) -{ - struct ib_device *device; - struct ib_fmr_pool *pool; - int i; - int ret; - int max_remaps; - - if (!params) - return ERR_PTR(-EINVAL); - - device = pd->device; - if (!device->alloc_fmr || !device->dealloc_fmr || - !device->map_phys_fmr || !device->unmap_fmr) { - pr_info(PFX "Device %s does not support FMRs\n", device->name); - return ERR_PTR(-ENOSYS); - } - - if (!device->attrs.max_map_per_fmr) - max_remaps = IB_FMR_MAX_REMAPS; - else - max_remaps = device->attrs.max_map_per_fmr; - - pool = kmalloc(sizeof *pool, GFP_KERNEL); - if (!pool) - return ERR_PTR(-ENOMEM); - - pool->cache_bucket = NULL; - pool->flush_function = params->flush_function; - pool->flush_arg = params->flush_arg; - - INIT_LIST_HEAD(&pool->free_list); - INIT_LIST_HEAD(&pool->dirty_list); - - if (params->cache) { - pool->cache_bucket = - kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, - GFP_KERNEL); - if (!pool->cache_bucket) { - pr_warn(PFX "Failed to allocate cache in pool\n"); - ret = -ENOMEM; - goto out_free_pool; - } - - for (i = 0; i < IB_FMR_HASH_SIZE; ++i) - INIT_HLIST_HEAD(pool->cache_bucket + i); - } - - pool->pool_size = 0; - pool->max_pages = params->max_pages_per_fmr; - pool->max_remaps = max_remaps; - pool->dirty_watermark = params->dirty_watermark; - pool->dirty_len = 0; - spin_lock_init(&pool->pool_lock); - atomic_set(&pool->req_ser, 0); - atomic_set(&pool->flush_ser, 0); - init_waitqueue_head(&pool->force_wait); - - pool->thread = kthread_run(ib_fmr_cleanup_thread, - pool, - "ib_fmr(%s)", - device->name); - if (IS_ERR(pool->thread)) { - pr_warn(PFX "couldn't start cleanup thread\n"); - ret = PTR_ERR(pool->thread); - goto out_free_pool; - } - - { - struct ib_pool_fmr *fmr; - struct ib_fmr_attr fmr_attr = { - .max_pages = params->max_pages_per_fmr, - .max_maps = pool->max_remaps, - .page_shift = params->page_shift - }; - int bytes_per_fmr = sizeof *fmr; - - if (pool->cache_bucket) - bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); - - for (i = 0; i < params->pool_size; ++i) { - fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); - if (!fmr) - goto out_fail; - - fmr->pool = pool; - fmr->remap_count = 0; - fmr->ref_count = 0; - INIT_HLIST_NODE(&fmr->cache_node); - - fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); - if (IS_ERR(fmr->fmr)) { - pr_warn(PFX "fmr_create failed for FMR %d\n", - i); - kfree(fmr); - goto out_fail; - } - - list_add_tail(&fmr->list, &pool->free_list); - ++pool->pool_size; - } - } - - return pool; - - out_free_pool: - kfree(pool->cache_bucket); - kfree(pool); - - return ERR_PTR(ret); - - out_fail: - ib_destroy_fmr_pool(pool); - - return ERR_PTR(-ENOMEM); -} -EXPORT_SYMBOL(ib_create_fmr_pool); - -/** - * ib_destroy_fmr_pool - Free FMR pool - * @pool:FMR pool to free - * - * Destroy an FMR pool and free all associated resources. - */ -void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) -{ - struct ib_pool_fmr *fmr; - struct ib_pool_fmr *tmp; - LIST_HEAD(fmr_list); - int i; - - kthread_stop(pool->thread); - ib_fmr_batch_release(pool); - - i = 0; - list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { - if (fmr->remap_count) { - INIT_LIST_HEAD(&fmr_list); - list_add_tail(&fmr->fmr->list, &fmr_list); - ib_unmap_fmr(&fmr_list); - } - ib_dealloc_fmr(fmr->fmr); - list_del(&fmr->list); - kfree(fmr); - ++i; - } - - if (i < pool->pool_size) - pr_warn(PFX "pool still has %d regions registered\n", - pool->pool_size - i); - - kfree(pool->cache_bucket); - kfree(pool); -} -EXPORT_SYMBOL(ib_destroy_fmr_pool); - -/** - * ib_flush_fmr_pool - Invalidate all unmapped FMRs - * @pool:FMR pool to flush - * - * Ensure that all unmapped FMRs are fully invalidated. - */ -int ib_flush_fmr_pool(struct ib_fmr_pool *pool) -{ - int serial; - struct ib_pool_fmr *fmr, *next; - - /* - * The free_list holds FMRs that may have been used - * but have not been remapped enough times to be dirty. - * Put them on the dirty list now so that the cleanup - * thread will reap them too. - */ - spin_lock_irq(&pool->pool_lock); - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { - if (fmr->remap_count > 0) - list_move(&fmr->list, &pool->dirty_list); - } - spin_unlock_irq(&pool->pool_lock); - - serial = atomic_inc_return(&pool->req_ser); - wake_up_process(pool->thread); - - if (wait_event_interruptible(pool->force_wait, - atomic_read(&pool->flush_ser) - serial >= 0)) - return -EINTR; - - return 0; -} -EXPORT_SYMBOL(ib_flush_fmr_pool); - -/** - * ib_fmr_pool_map_phys - - * @pool:FMR pool to allocate FMR from - * @page_list:List of pages to map - * @list_len:Number of pages in @page_list - * @io_virtual_address:I/O virtual address for new FMR - * - * Map an FMR from an FMR pool. - */ -struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, - u64 *page_list, - int list_len, - u64 io_virtual_address) -{ - struct ib_fmr_pool *pool = pool_handle; - struct ib_pool_fmr *fmr; - unsigned long flags; - int result; - - if (list_len < 1 || list_len > pool->max_pages) - return ERR_PTR(-EINVAL); - - spin_lock_irqsave(&pool->pool_lock, flags); - fmr = ib_fmr_cache_lookup(pool, - page_list, - list_len, - io_virtual_address); - if (fmr) { - /* found in cache */ - ++fmr->ref_count; - if (fmr->ref_count == 1) { - list_del(&fmr->list); - } - - spin_unlock_irqrestore(&pool->pool_lock, flags); - - return fmr; - } - - if (list_empty(&pool->free_list)) { - spin_unlock_irqrestore(&pool->pool_lock, flags); - return ERR_PTR(-EAGAIN); - } - - fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); - list_del(&fmr->list); - hlist_del_init(&fmr->cache_node); - spin_unlock_irqrestore(&pool->pool_lock, flags); - - result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, - io_virtual_address); - - if (result) { - spin_lock_irqsave(&pool->pool_lock, flags); - list_add(&fmr->list, &pool->free_list); - spin_unlock_irqrestore(&pool->pool_lock, flags); - - pr_warn(PFX "fmr_map returns %d\n", result); - - return ERR_PTR(result); - } - - ++fmr->remap_count; - fmr->ref_count = 1; - - if (pool->cache_bucket) { - fmr->io_virtual_address = io_virtual_address; - fmr->page_list_len = list_len; - memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); - - spin_lock_irqsave(&pool->pool_lock, flags); - hlist_add_head(&fmr->cache_node, - pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); - spin_unlock_irqrestore(&pool->pool_lock, flags); - } - - return fmr; -} -EXPORT_SYMBOL(ib_fmr_pool_map_phys); - -/** - * ib_fmr_pool_unmap - Unmap FMR - * @fmr:FMR to unmap - * - * Unmap an FMR. The FMR mapping may remain valid until the FMR is - * reused (or until ib_flush_fmr_pool() is called). - */ -int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) -{ - struct ib_fmr_pool *pool; - unsigned long flags; - - pool = fmr->pool; - - spin_lock_irqsave(&pool->pool_lock, flags); - - --fmr->ref_count; - if (!fmr->ref_count) { - if (fmr->remap_count < pool->max_remaps) { - list_add_tail(&fmr->list, &pool->free_list); - } else { - list_add_tail(&fmr->list, &pool->dirty_list); - if (++pool->dirty_len >= pool->dirty_watermark) { - atomic_inc(&pool->req_ser); - wake_up_process(pool->thread); - } - } - } - -#ifdef DEBUG - if (fmr->ref_count < 0) - pr_warn(PFX "FMR %p has ref count %d < 0\n", - fmr, fmr->ref_count); -#endif - - spin_unlock_irqrestore(&pool->pool_lock, flags); - - return 0; -} -EXPORT_SYMBOL(ib_fmr_pool_unmap); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/fmr_pool.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/device.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/device.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/device.c (nonexistent) @@ -1,1025 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "core_priv.h" - -MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("core kernel InfiniBand API"); -MODULE_LICENSE("Dual BSD/GPL"); - -struct ib_client_data { - struct list_head list; - struct ib_client *client; - void * data; - /* The device or client is going down. Do not call client or device - * callbacks other than remove(). */ - bool going_down; -}; - -struct workqueue_struct *ib_comp_wq; -struct workqueue_struct *ib_wq; -EXPORT_SYMBOL_GPL(ib_wq); - -/* The device_list and client_list contain devices and clients after their - * registration has completed, and the devices and clients are removed - * during unregistration. */ -static LIST_HEAD(device_list); -static LIST_HEAD(client_list); - -/* - * device_mutex and lists_rwsem protect access to both device_list and - * client_list. device_mutex protects writer access by device and client - * registration / de-registration. lists_rwsem protects reader access to - * these lists. Iterators of these lists must lock it for read, while updates - * to the lists must be done with a write lock. A special case is when the - * device_mutex is locked. In this case locking the lists for read access is - * not necessary as the device_mutex implies it. - * - * lists_rwsem also protects access to the client data list. - */ -static DEFINE_MUTEX(device_mutex); -static DECLARE_RWSEM(lists_rwsem); - - -static int ib_device_check_mandatory(struct ib_device *device) -{ -#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } - static const struct { - size_t offset; - char *name; - } mandatory_table[] = { - IB_MANDATORY_FUNC(query_device), - IB_MANDATORY_FUNC(query_port), - IB_MANDATORY_FUNC(query_pkey), - IB_MANDATORY_FUNC(query_gid), - IB_MANDATORY_FUNC(alloc_pd), - IB_MANDATORY_FUNC(dealloc_pd), - IB_MANDATORY_FUNC(create_ah), - IB_MANDATORY_FUNC(destroy_ah), - IB_MANDATORY_FUNC(create_qp), - IB_MANDATORY_FUNC(modify_qp), - IB_MANDATORY_FUNC(destroy_qp), - IB_MANDATORY_FUNC(post_send), - IB_MANDATORY_FUNC(post_recv), - IB_MANDATORY_FUNC(create_cq), - IB_MANDATORY_FUNC(destroy_cq), - IB_MANDATORY_FUNC(poll_cq), - IB_MANDATORY_FUNC(req_notify_cq), - IB_MANDATORY_FUNC(get_dma_mr), - IB_MANDATORY_FUNC(dereg_mr), - IB_MANDATORY_FUNC(get_port_immutable) - }; - int i; - - for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { - if (!*(void **) ((char *) device + mandatory_table[i].offset)) { - pr_warn("Device %s is missing mandatory function %s\n", - device->name, mandatory_table[i].name); - return -EINVAL; - } - } - - return 0; -} - -static struct ib_device *__ib_device_get_by_name(const char *name) -{ - struct ib_device *device; - - list_for_each_entry(device, &device_list, core_list) - if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) - return device; - - return NULL; -} - - -static int alloc_name(char *name) -{ - unsigned long *inuse; - char buf[IB_DEVICE_NAME_MAX]; - struct ib_device *device; - int i; - - inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!inuse) - return -ENOMEM; - - list_for_each_entry(device, &device_list, core_list) { - if (!sscanf(device->name, name, &i)) - continue; - if (i < 0 || i >= PAGE_SIZE * 8) - continue; - snprintf(buf, sizeof buf, name, i); - if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) - set_bit(i, inuse); - } - - i = find_first_zero_bit(inuse, PAGE_SIZE * 8); - free_page((unsigned long) inuse); - snprintf(buf, sizeof buf, name, i); - - if (__ib_device_get_by_name(buf)) - return -ENFILE; - - strlcpy(name, buf, IB_DEVICE_NAME_MAX); - return 0; -} - -static void ib_device_release(struct device *device) -{ - struct ib_device *dev = container_of(device, struct ib_device, dev); - - ib_cache_release_one(dev); - kfree(dev->port_immutable); - kfree(dev); -} - -static struct class ib_class = { - .name = "infiniband", - .dev_release = ib_device_release, -}; - -/** - * ib_alloc_device - allocate an IB device struct - * @size:size of structure to allocate - * - * Low-level drivers should use ib_alloc_device() to allocate &struct - * ib_device. @size is the size of the structure to be allocated, - * including any private data used by the low-level driver. - * ib_dealloc_device() must be used to free structures allocated with - * ib_alloc_device(). - */ -struct ib_device *ib_alloc_device(size_t size) -{ - struct ib_device *device; - - if (WARN_ON(size < sizeof(struct ib_device))) - return NULL; - - device = kzalloc(size, GFP_KERNEL); - if (!device) - return NULL; - - device->dev.parent = &linux_root_device; - device->dev.class = &ib_class; - device_initialize(&device->dev); - - dev_set_drvdata(&device->dev, device); - - INIT_LIST_HEAD(&device->event_handler_list); - spin_lock_init(&device->event_handler_lock); - spin_lock_init(&device->client_data_lock); - INIT_LIST_HEAD(&device->client_data_list); - INIT_LIST_HEAD(&device->port_list); - - return device; -} -EXPORT_SYMBOL(ib_alloc_device); - -/** - * ib_dealloc_device - free an IB device struct - * @device:structure to free - * - * Free a structure allocated with ib_alloc_device(). - */ -void ib_dealloc_device(struct ib_device *device) -{ - WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && - device->reg_state != IB_DEV_UNINITIALIZED); - kobject_put(&device->dev.kobj); -} -EXPORT_SYMBOL(ib_dealloc_device); - -static int add_client_context(struct ib_device *device, struct ib_client *client) -{ - struct ib_client_data *context; - unsigned long flags; - - context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) { - pr_warn("Couldn't allocate client context for %s/%s\n", - device->name, client->name); - return -ENOMEM; - } - - context->client = client; - context->data = NULL; - context->going_down = false; - - down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); - list_add(&context->list, &device->client_data_list); - spin_unlock_irqrestore(&device->client_data_lock, flags); - up_write(&lists_rwsem); - - return 0; -} - -static int verify_immutable(const struct ib_device *dev, u8 port) -{ - return WARN_ON(!rdma_cap_ib_mad(dev, port) && - rdma_max_mad_size(dev, port) != 0); -} - -static int read_port_immutable(struct ib_device *device) -{ - int ret; - u8 start_port = rdma_start_port(device); - u8 end_port = rdma_end_port(device); - u8 port; - - /** - * device->port_immutable is indexed directly by the port number to make - * access to this data as efficient as possible. - * - * Therefore port_immutable is declared as a 1 based array with - * potential empty slots at the beginning. - */ - device->port_immutable = kzalloc(sizeof(*device->port_immutable) - * (end_port + 1), - GFP_KERNEL); - if (!device->port_immutable) - return -ENOMEM; - - for (port = start_port; port <= end_port; ++port) { - ret = device->get_port_immutable(device, port, - &device->port_immutable[port]); - if (ret) - return ret; - - if (verify_immutable(device, port)) - return -EINVAL; - } - return 0; -} - -void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) -{ - if (dev->get_dev_fw_str) - dev->get_dev_fw_str(dev, str, str_len); - else - str[0] = '\0'; -} -EXPORT_SYMBOL(ib_get_device_fw_str); - -/** - * ib_register_device - Register an IB device with IB core - * @device:Device to register - * - * Low-level drivers use ib_register_device() to register their - * devices with the IB core. All registered clients will receive a - * callback for each device that is added. @device must be allocated - * with ib_alloc_device(). - */ -int ib_register_device(struct ib_device *device, - int (*port_callback)(struct ib_device *, - u8, struct kobject *)) -{ - int ret; - struct ib_client *client; - struct ib_udata uhw = {.outlen = 0, .inlen = 0}; - - mutex_lock(&device_mutex); - - if (strchr(device->name, '%')) { - ret = alloc_name(device->name); - if (ret) - goto out; - } - - if (ib_device_check_mandatory(device)) { - ret = -EINVAL; - goto out; - } - - ret = read_port_immutable(device); - if (ret) { - pr_warn("Couldn't create per port immutable data %s\n", - device->name); - goto out; - } - - ret = ib_cache_setup_one(device); - if (ret) { - pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); - goto out; - } - - memset(&device->attrs, 0, sizeof(device->attrs)); - ret = device->query_device(device, &device->attrs, &uhw); - if (ret) { - pr_warn("Couldn't query the device attributes\n"); - ib_cache_cleanup_one(device); - goto out; - } - - ret = ib_device_register_sysfs(device, port_callback); - if (ret) { - pr_warn("Couldn't register device %s with driver model\n", - device->name); - ib_cache_cleanup_one(device); - goto out; - } - - device->reg_state = IB_DEV_REGISTERED; - - list_for_each_entry(client, &client_list, list) - if (client->add && !add_client_context(device, client)) - client->add(device); - - down_write(&lists_rwsem); - list_add_tail(&device->core_list, &device_list); - up_write(&lists_rwsem); -out: - mutex_unlock(&device_mutex); - return ret; -} -EXPORT_SYMBOL(ib_register_device); - -/** - * ib_unregister_device - Unregister an IB device - * @device:Device to unregister - * - * Unregister an IB device. All clients will receive a remove callback. - */ -void ib_unregister_device(struct ib_device *device) -{ - struct ib_client_data *context, *tmp; - unsigned long flags; - - mutex_lock(&device_mutex); - - down_write(&lists_rwsem); - list_del(&device->core_list); - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) - context->going_down = true; - spin_unlock_irqrestore(&device->client_data_lock, flags); - downgrade_write(&lists_rwsem); - - list_for_each_entry_safe(context, tmp, &device->client_data_list, - list) { - if (context->client->remove) - context->client->remove(device, context->data); - } - up_read(&lists_rwsem); - - mutex_unlock(&device_mutex); - - ib_device_unregister_sysfs(device); - ib_cache_cleanup_one(device); - - down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) - kfree(context); - spin_unlock_irqrestore(&device->client_data_lock, flags); - up_write(&lists_rwsem); - - device->reg_state = IB_DEV_UNREGISTERED; -} -EXPORT_SYMBOL(ib_unregister_device); - -/** - * ib_register_client - Register an IB client - * @client:Client to register - * - * Upper level users of the IB drivers can use ib_register_client() to - * register callbacks for IB device addition and removal. When an IB - * device is added, each registered client's add method will be called - * (in the order the clients were registered), and when a device is - * removed, each client's remove method will be called (in the reverse - * order that clients were registered). In addition, when - * ib_register_client() is called, the client will receive an add - * callback for all devices already registered. - */ -int ib_register_client(struct ib_client *client) -{ - struct ib_device *device; - - mutex_lock(&device_mutex); - - list_for_each_entry(device, &device_list, core_list) - if (client->add && !add_client_context(device, client)) - client->add(device); - - down_write(&lists_rwsem); - list_add_tail(&client->list, &client_list); - up_write(&lists_rwsem); - - mutex_unlock(&device_mutex); - - return 0; -} -EXPORT_SYMBOL(ib_register_client); - -/** - * ib_unregister_client - Unregister an IB client - * @client:Client to unregister - * - * Upper level users use ib_unregister_client() to remove their client - * registration. When ib_unregister_client() is called, the client - * will receive a remove callback for each IB device still registered. - */ -void ib_unregister_client(struct ib_client *client) -{ - struct ib_client_data *context, *tmp; - struct ib_device *device; - unsigned long flags; - - mutex_lock(&device_mutex); - - down_write(&lists_rwsem); - list_del(&client->list); - up_write(&lists_rwsem); - - list_for_each_entry(device, &device_list, core_list) { - struct ib_client_data *found_context = NULL; - - down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry_safe(context, tmp, &device->client_data_list, list) - if (context->client == client) { - context->going_down = true; - found_context = context; - break; - } - spin_unlock_irqrestore(&device->client_data_lock, flags); - up_write(&lists_rwsem); - - if (client->remove) - client->remove(device, found_context ? - found_context->data : NULL); - - if (!found_context) { - pr_warn("No client context found for %s/%s\n", - device->name, client->name); - continue; - } - - down_write(&lists_rwsem); - spin_lock_irqsave(&device->client_data_lock, flags); - list_del(&found_context->list); - kfree(found_context); - spin_unlock_irqrestore(&device->client_data_lock, flags); - up_write(&lists_rwsem); - } - - mutex_unlock(&device_mutex); -} -EXPORT_SYMBOL(ib_unregister_client); - -/** - * ib_get_client_data - Get IB client context - * @device:Device to get context for - * @client:Client to get context for - * - * ib_get_client_data() returns client context set with - * ib_set_client_data(). - */ -void *ib_get_client_data(struct ib_device *device, struct ib_client *client) -{ - struct ib_client_data *context; - void *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - ret = context->data; - break; - } - spin_unlock_irqrestore(&device->client_data_lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_get_client_data); - -/** - * ib_set_client_data - Set IB client context - * @device:Device to set context for - * @client:Client to set context for - * @data:Context to set - * - * ib_set_client_data() sets client context that can be retrieved with - * ib_get_client_data(). - */ -void ib_set_client_data(struct ib_device *device, struct ib_client *client, - void *data) -{ - struct ib_client_data *context; - unsigned long flags; - - spin_lock_irqsave(&device->client_data_lock, flags); - list_for_each_entry(context, &device->client_data_list, list) - if (context->client == client) { - context->data = data; - goto out; - } - - pr_warn("No client context found for %s/%s\n", - device->name, client->name); - -out: - spin_unlock_irqrestore(&device->client_data_lock, flags); -} -EXPORT_SYMBOL(ib_set_client_data); - -/** - * ib_register_event_handler - Register an IB event handler - * @event_handler:Handler to register - * - * ib_register_event_handler() registers an event handler that will be - * called back when asynchronous IB events occur (as defined in - * chapter 11 of the InfiniBand Architecture Specification). This - * callback may occur in interrupt context. - */ -int ib_register_event_handler (struct ib_event_handler *event_handler) -{ - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); - list_add_tail(&event_handler->list, - &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); - - return 0; -} -EXPORT_SYMBOL(ib_register_event_handler); - -/** - * ib_unregister_event_handler - Unregister an event handler - * @event_handler:Handler to unregister - * - * Unregister an event handler registered with - * ib_register_event_handler(). - */ -int ib_unregister_event_handler(struct ib_event_handler *event_handler) -{ - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); - list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); - - return 0; -} -EXPORT_SYMBOL(ib_unregister_event_handler); - -/** - * ib_dispatch_event - Dispatch an asynchronous event - * @event:Event to dispatch - * - * Low-level drivers must call ib_dispatch_event() to dispatch the - * event to all registered event handlers when an asynchronous event - * occurs. - */ -void ib_dispatch_event(struct ib_event *event) -{ - unsigned long flags; - struct ib_event_handler *handler; - - spin_lock_irqsave(&event->device->event_handler_lock, flags); - - list_for_each_entry(handler, &event->device->event_handler_list, list) - handler->handler(handler, event); - - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); -} -EXPORT_SYMBOL(ib_dispatch_event); - -/** - * ib_query_port - Query IB port attributes - * @device:Device to query - * @port_num:Port number to query - * @port_attr:Port attributes - * - * ib_query_port() returns the attributes of a port through the - * @port_attr pointer. - */ -int ib_query_port(struct ib_device *device, - u8 port_num, - struct ib_port_attr *port_attr) -{ - union ib_gid gid; - int err; - - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) - return -EINVAL; - - memset(port_attr, 0, sizeof(*port_attr)); - err = device->query_port(device, port_num, port_attr); - if (err || port_attr->subnet_prefix) - return err; - - if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) - return 0; - - err = ib_query_gid(device, port_num, 0, &gid, NULL); - if (err) - return err; - - port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); - return 0; -} -EXPORT_SYMBOL(ib_query_port); - -/** - * ib_query_gid - Get GID table entry - * @device:Device to query - * @port_num:Port number to query - * @index:GID table index to query - * @gid:Returned GID - * @attr: Returned GID attributes related to this GID index (only in RoCE). - * NULL means ignore. - * - * ib_query_gid() fetches the specified GID table entry. - */ -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr) -{ - if (rdma_cap_roce_gid_table(device, port_num)) - return ib_get_cached_gid(device, port_num, index, gid, attr); - - if (attr) - return -EINVAL; - - return device->query_gid(device, port_num, index, gid); -} -EXPORT_SYMBOL(ib_query_gid); - -/** - * ib_enum_roce_netdev - enumerate all RoCE ports - * @ib_dev : IB device we want to query - * @filter: Should we call the callback? - * @filter_cookie: Cookie passed to filter - * @cb: Callback to call for each found RoCE ports - * @cookie: Cookie passed back to the callback - * - * Enumerates all of the physical RoCE ports of ib_dev - * which are related to netdevice and calls callback() on each - * device for which filter() function returns non zero. - */ -void ib_enum_roce_netdev(struct ib_device *ib_dev, - roce_netdev_filter filter, - void *filter_cookie, - roce_netdev_callback cb, - void *cookie) -{ - u8 port; - - for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); - port++) - if (rdma_protocol_roce(ib_dev, port)) { - struct net_device *idev = NULL; - - if (ib_dev->get_netdev) - idev = ib_dev->get_netdev(ib_dev, port); - - if (idev && (idev->if_flags & IFF_DYING)) { - dev_put(idev); - idev = NULL; - } - - if (filter(ib_dev, port, idev, filter_cookie)) - cb(ib_dev, port, idev, cookie); - - if (idev) - dev_put(idev); - } -} - -/** - * ib_enum_all_roce_netdevs - enumerate all RoCE devices - * @filter: Should we call the callback? - * @filter_cookie: Cookie passed to filter - * @cb: Callback to call for each found RoCE ports - * @cookie: Cookie passed back to the callback - * - * Enumerates all RoCE devices' physical ports which are related - * to netdevices and calls callback() on each device for which - * filter() function returns non zero. - */ -void ib_enum_all_roce_netdevs(roce_netdev_filter filter, - void *filter_cookie, - roce_netdev_callback cb, - void *cookie) -{ - struct ib_device *dev; - - down_read(&lists_rwsem); - list_for_each_entry(dev, &device_list, core_list) - ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); - up_read(&lists_rwsem); -} - -/** - * ib_query_pkey - Get P_Key table entry - * @device:Device to query - * @port_num:Port number to query - * @index:P_Key table index to query - * @pkey:Returned P_Key - * - * ib_query_pkey() fetches the specified P_Key table entry. - */ -int ib_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey) -{ - return device->query_pkey(device, port_num, index, pkey); -} -EXPORT_SYMBOL(ib_query_pkey); - -/** - * ib_modify_device - Change IB device attributes - * @device:Device to modify - * @device_modify_mask:Mask of attributes to change - * @device_modify:New attribute values - * - * ib_modify_device() changes a device's attributes as specified by - * the @device_modify_mask and @device_modify structure. - */ -int ib_modify_device(struct ib_device *device, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - if (!device->modify_device) - return -ENOSYS; - - return device->modify_device(device, device_modify_mask, - device_modify); -} -EXPORT_SYMBOL(ib_modify_device); - -/** - * ib_modify_port - Modifies the attributes for the specified port. - * @device: The device to modify. - * @port_num: The number of the port to modify. - * @port_modify_mask: Mask used to specify which attributes of the port - * to change. - * @port_modify: New attribute values for the port. - * - * ib_modify_port() changes a port's attributes as specified by the - * @port_modify_mask and @port_modify structure. - */ -int ib_modify_port(struct ib_device *device, - u8 port_num, int port_modify_mask, - struct ib_port_modify *port_modify) -{ - if (!device->modify_port) - return -ENOSYS; - - if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) - return -EINVAL; - - return device->modify_port(device, port_num, port_modify_mask, - port_modify); -} -EXPORT_SYMBOL(ib_modify_port); - -/** - * ib_find_gid - Returns the port number and GID table index where - * a specified GID value occurs. - * @device: The device to query. - * @gid: The GID value to search for. - * @gid_type: Type of GID. - * @ndev: The ndev related to the GID to search for. - * @port_num: The port number of the device where the GID value was found. - * @index: The index into the GID table where the GID was found. This - * parameter may be NULL. - */ -int ib_find_gid(struct ib_device *device, union ib_gid *gid, - enum ib_gid_type gid_type, struct net_device *ndev, - u8 *port_num, u16 *index) -{ - union ib_gid tmp_gid; - int ret, port, i; - - for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { - if (rdma_cap_roce_gid_table(device, port)) { - if (!ib_find_cached_gid_by_port(device, gid, gid_type, port, - ndev, index)) { - *port_num = port; - return 0; - } - } - - if (gid_type != IB_GID_TYPE_IB) - continue; - - for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid, NULL); - if (ret) - return ret; - if (!memcmp(&tmp_gid, gid, sizeof *gid)) { - *port_num = port; - if (index) - *index = i; - return 0; - } - } - } - - return -ENOENT; -} -EXPORT_SYMBOL(ib_find_gid); - -/** - * ib_find_pkey - Returns the PKey table index where a specified - * PKey value occurs. - * @device: The device to query. - * @port_num: The port number of the device to search for the PKey. - * @pkey: The PKey value to search for. - * @index: The index into the PKey table where the PKey was found. - */ -int ib_find_pkey(struct ib_device *device, - u8 port_num, u16 pkey, u16 *index) -{ - int ret, i; - u16 tmp_pkey; - int partial_ix = -1; - - for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { - ret = ib_query_pkey(device, port_num, i, &tmp_pkey); - if (ret) - return ret; - if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { - /* if there is full-member pkey take it.*/ - if (tmp_pkey & 0x8000) { - *index = i; - return 0; - } - if (partial_ix < 0) - partial_ix = i; - } - } - - /*no full-member, if exists take the limited*/ - if (partial_ix >= 0) { - *index = partial_ix; - return 0; - } - return -ENOENT; -} -EXPORT_SYMBOL(ib_find_pkey); - -/** - * ib_get_net_dev_by_params() - Return the appropriate net_dev - * for a received CM request - * @dev: An RDMA device on which the request has been received. - * @port: Port number on the RDMA device. - * @pkey: The Pkey the request came on. - * @gid: A GID that the net_dev uses to communicate. - * @addr: Contains the IP address that the request specified as its - * destination. - */ -struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, - u8 port, - u16 pkey, - const union ib_gid *gid, - const struct sockaddr *addr) -{ - struct net_device *net_dev = NULL; - struct ib_client_data *context; - - if (!rdma_protocol_ib(dev, port)) - return NULL; - - down_read(&lists_rwsem); - - list_for_each_entry(context, &dev->client_data_list, list) { - struct ib_client *client = context->client; - - if (context->going_down) - continue; - - if (client->get_net_dev_by_params) { - net_dev = client->get_net_dev_by_params(dev, port, pkey, - gid, addr, - context->data); - if (net_dev) - break; - } - } - - up_read(&lists_rwsem); - - return net_dev; -} -EXPORT_SYMBOL(ib_get_net_dev_by_params); - -static int __init ib_core_init(void) -{ - int ret; - - ib_wq = alloc_workqueue("infiniband", 0, 0); - if (!ib_wq) - return -ENOMEM; - - ib_comp_wq = alloc_workqueue("ib-comp-wq", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - mp_ncpus * 4 /* WQ_UNBOUND_MAX_ACTIVE */); - if (!ib_comp_wq) { - ret = -ENOMEM; - goto err; - } - - ret = class_register(&ib_class); - if (ret) { - pr_warn("Couldn't create InfiniBand device class\n"); - goto err_comp; - } - - ret = addr_init(); - if (ret) { - pr_warn("Could't init IB address resolution\n"); - goto err_sysfs; - } - - ret = ib_mad_init(); - if (ret) { - pr_warn("Couldn't init IB MAD\n"); - goto err_addr; - } - - ret = ib_sa_init(); - if (ret) { - pr_warn("Couldn't init SA\n"); - goto err_mad; - } - - ib_cache_setup(); - - return 0; - -err_mad: - ib_mad_cleanup(); -err_addr: - addr_cleanup(); -err_sysfs: - class_unregister(&ib_class); -err_comp: - destroy_workqueue(ib_comp_wq); -err: - destroy_workqueue(ib_wq); - return ret; -} - -static void __exit ib_core_cleanup(void) -{ - ib_cache_cleanup(); - ib_sa_cleanup(); - ib_mad_cleanup(); - addr_cleanup(); - class_unregister(&ib_class); - destroy_workqueue(ib_comp_wq); - /* Make sure that any pending umem accounting work is done. */ - destroy_workqueue(ib_wq); -} - -module_init(ib_core_init); -module_exit(ib_core_cleanup); - -MODULE_VERSION(ibcore, 1); -MODULE_DEPEND(ibcore, linuxkpi, 1, 1, 1); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/device.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad_rmpp.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad_rmpp.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad_rmpp.c (nonexistent) @@ -1,968 +0,0 @@ -/* - * Copyright (c) 2005 Intel Inc. All rights reserved. - * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "mad_priv.h" -#include "mad_rmpp.h" - -enum rmpp_state { - RMPP_STATE_ACTIVE, - RMPP_STATE_TIMEOUT, - RMPP_STATE_COMPLETE, - RMPP_STATE_CANCELING -}; - -struct mad_rmpp_recv { - struct ib_mad_agent_private *agent; - struct list_head list; - struct delayed_work timeout_work; - struct delayed_work cleanup_work; - struct completion comp; - enum rmpp_state state; - spinlock_t lock; - atomic_t refcount; - - struct ib_ah *ah; - struct ib_mad_recv_wc *rmpp_wc; - struct ib_mad_recv_buf *cur_seg_buf; - int last_ack; - int seg_num; - int newwin; - int repwin; - - __be64 tid; - u32 src_qp; - u16 slid; - u8 mgmt_class; - u8 class_version; - u8 method; - u8 base_version; -}; - -static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) -{ - if (atomic_dec_and_test(&rmpp_recv->refcount)) - complete(&rmpp_recv->comp); -} - -static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) -{ - deref_rmpp_recv(rmpp_recv); - wait_for_completion(&rmpp_recv->comp); - ib_destroy_ah(rmpp_recv->ah); - kfree(rmpp_recv); -} - -void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) -{ - struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv; - unsigned long flags; - - spin_lock_irqsave(&agent->lock, flags); - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { - if (rmpp_recv->state != RMPP_STATE_COMPLETE) - ib_free_recv_mad(rmpp_recv->rmpp_wc); - rmpp_recv->state = RMPP_STATE_CANCELING; - } - spin_unlock_irqrestore(&agent->lock, flags); - - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { - cancel_delayed_work(&rmpp_recv->timeout_work); - cancel_delayed_work(&rmpp_recv->cleanup_work); - } - - flush_workqueue(agent->qp_info->port_priv->wq); - - list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, - &agent->rmpp_list, list) { - list_del(&rmpp_recv->list); - destroy_rmpp_recv(rmpp_recv); - } -} - -static void format_ack(struct ib_mad_send_buf *msg, - struct ib_rmpp_mad *data, - struct mad_rmpp_recv *rmpp_recv) -{ - struct ib_rmpp_mad *ack = msg->mad; - unsigned long flags; - - memcpy(ack, &data->mad_hdr, msg->hdr_len); - - ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; - ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; - ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - - spin_lock_irqsave(&rmpp_recv->lock, flags); - rmpp_recv->last_ack = rmpp_recv->seg_num; - ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); - ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); - spin_unlock_irqrestore(&rmpp_recv->lock, flags); -} - -static void ack_recv(struct mad_rmpp_recv *rmpp_recv, - struct ib_mad_recv_wc *recv_wc) -{ - struct ib_mad_send_buf *msg; - int ret, hdr_len; - - hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); - msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, hdr_len, - 0, GFP_KERNEL, - IB_MGMT_BASE_VERSION); - if (IS_ERR(msg)) - return; - - format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); - msg->ah = rmpp_recv->ah; - ret = ib_post_send_mad(msg, NULL); - if (ret) - ib_free_send_mad(msg); -} - -static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, - struct ib_mad_recv_wc *recv_wc) -{ - struct ib_mad_send_buf *msg; - struct ib_ah *ah; - int hdr_len; - - ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, - recv_wc->recv_buf.grh, agent->port_num); - if (IS_ERR(ah)) - return (void *) ah; - - hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); - msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, - hdr_len, 0, GFP_KERNEL, - IB_MGMT_BASE_VERSION); - if (IS_ERR(msg)) - ib_destroy_ah(ah); - else { - msg->ah = ah; - msg->context[0] = ah; - } - - return msg; -} - -static void ack_ds_ack(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *recv_wc) -{ - struct ib_mad_send_buf *msg; - struct ib_rmpp_mad *rmpp_mad; - int ret; - - msg = alloc_response_msg(&agent->agent, recv_wc); - if (IS_ERR(msg)) - return; - - rmpp_mad = msg->mad; - memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); - - rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - rmpp_mad->rmpp_hdr.seg_num = 0; - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1); - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - ib_destroy_ah(msg->ah); - ib_free_send_mad(msg); - } -} - -void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) -{ - if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) - ib_destroy_ah(mad_send_wc->send_buf->ah); - ib_free_send_mad(mad_send_wc->send_buf); -} - -static void nack_recv(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *recv_wc, u8 rmpp_status) -{ - struct ib_mad_send_buf *msg; - struct ib_rmpp_mad *rmpp_mad; - int ret; - - msg = alloc_response_msg(&agent->agent, recv_wc); - if (IS_ERR(msg)) - return; - - rmpp_mad = msg->mad; - memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); - - rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; - rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; - rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status; - rmpp_mad->rmpp_hdr.seg_num = 0; - rmpp_mad->rmpp_hdr.paylen_newwin = 0; - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - ib_destroy_ah(msg->ah); - ib_free_send_mad(msg); - } -} - -static void recv_timeout_handler(struct work_struct *work) -{ - struct mad_rmpp_recv *rmpp_recv = - container_of(work, struct mad_rmpp_recv, timeout_work.work); - struct ib_mad_recv_wc *rmpp_wc; - unsigned long flags; - - spin_lock_irqsave(&rmpp_recv->agent->lock, flags); - if (rmpp_recv->state != RMPP_STATE_ACTIVE) { - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - return; - } - rmpp_recv->state = RMPP_STATE_TIMEOUT; - list_del(&rmpp_recv->list); - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - - rmpp_wc = rmpp_recv->rmpp_wc; - nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L); - destroy_rmpp_recv(rmpp_recv); - ib_free_recv_mad(rmpp_wc); -} - -static void recv_cleanup_handler(struct work_struct *work) -{ - struct mad_rmpp_recv *rmpp_recv = - container_of(work, struct mad_rmpp_recv, cleanup_work.work); - unsigned long flags; - - spin_lock_irqsave(&rmpp_recv->agent->lock, flags); - if (rmpp_recv->state == RMPP_STATE_CANCELING) { - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - return; - } - list_del(&rmpp_recv->list); - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - destroy_rmpp_recv(rmpp_recv); -} - -static struct mad_rmpp_recv * -create_rmpp_recv(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct mad_rmpp_recv *rmpp_recv; - struct ib_mad_hdr *mad_hdr; - - rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL); - if (!rmpp_recv) - return NULL; - - rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd, - mad_recv_wc->wc, - mad_recv_wc->recv_buf.grh, - agent->agent.port_num); - if (IS_ERR(rmpp_recv->ah)) - goto error; - - rmpp_recv->agent = agent; - init_completion(&rmpp_recv->comp); - INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler); - INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); - spin_lock_init(&rmpp_recv->lock); - rmpp_recv->state = RMPP_STATE_ACTIVE; - atomic_set(&rmpp_recv->refcount, 1); - - rmpp_recv->rmpp_wc = mad_recv_wc; - rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; - rmpp_recv->newwin = 1; - rmpp_recv->seg_num = 1; - rmpp_recv->last_ack = 0; - rmpp_recv->repwin = 1; - - mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; - rmpp_recv->tid = mad_hdr->tid; - rmpp_recv->src_qp = mad_recv_wc->wc->src_qp; - rmpp_recv->slid = mad_recv_wc->wc->slid; - rmpp_recv->mgmt_class = mad_hdr->mgmt_class; - rmpp_recv->class_version = mad_hdr->class_version; - rmpp_recv->method = mad_hdr->method; - rmpp_recv->base_version = mad_hdr->base_version; - return rmpp_recv; - -error: kfree(rmpp_recv); - return NULL; -} - -static struct mad_rmpp_recv * -find_rmpp_recv(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct mad_rmpp_recv *rmpp_recv; - struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; - - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { - if (rmpp_recv->tid == mad_hdr->tid && - rmpp_recv->src_qp == mad_recv_wc->wc->src_qp && - rmpp_recv->slid == mad_recv_wc->wc->slid && - rmpp_recv->mgmt_class == mad_hdr->mgmt_class && - rmpp_recv->class_version == mad_hdr->class_version && - rmpp_recv->method == mad_hdr->method) - return rmpp_recv; - } - return NULL; -} - -static struct mad_rmpp_recv * -acquire_rmpp_recv(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct mad_rmpp_recv *rmpp_recv; - unsigned long flags; - - spin_lock_irqsave(&agent->lock, flags); - rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); - if (rmpp_recv) - atomic_inc(&rmpp_recv->refcount); - spin_unlock_irqrestore(&agent->lock, flags); - return rmpp_recv; -} - -static struct mad_rmpp_recv * -insert_rmpp_recv(struct ib_mad_agent_private *agent, - struct mad_rmpp_recv *rmpp_recv) -{ - struct mad_rmpp_recv *cur_rmpp_recv; - - cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc); - if (!cur_rmpp_recv) - list_add_tail(&rmpp_recv->list, &agent->rmpp_list); - - return cur_rmpp_recv; -} - -static inline int get_last_flag(struct ib_mad_recv_buf *seg) -{ - struct ib_rmpp_mad *rmpp_mad; - - rmpp_mad = (struct ib_rmpp_mad *) seg->mad; - return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST; -} - -static inline int get_seg_num(struct ib_mad_recv_buf *seg) -{ - struct ib_rmpp_mad *rmpp_mad; - - rmpp_mad = (struct ib_rmpp_mad *) seg->mad; - return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); -} - -static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list, - struct ib_mad_recv_buf *seg) -{ - if (seg->list.next == rmpp_list) - return NULL; - - return container_of(seg->list.next, struct ib_mad_recv_buf, list); -} - -static inline int window_size(struct ib_mad_agent_private *agent) -{ - return max(agent->qp_info->recv_queue.max_active >> 3, 1); -} - -static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, - int seg_num) -{ - struct ib_mad_recv_buf *seg_buf; - int cur_seg_num; - - list_for_each_entry_reverse(seg_buf, rmpp_list, list) { - cur_seg_num = get_seg_num(seg_buf); - if (seg_num > cur_seg_num) - return seg_buf; - if (seg_num == cur_seg_num) - break; - } - return NULL; -} - -static void update_seg_num(struct mad_rmpp_recv *rmpp_recv, - struct ib_mad_recv_buf *new_buf) -{ - struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list; - - while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) { - rmpp_recv->cur_seg_buf = new_buf; - rmpp_recv->seg_num++; - new_buf = get_next_seg(rmpp_list, new_buf); - } -} - -static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) -{ - struct ib_rmpp_mad *rmpp_mad; - int hdr_size, data_size, pad; - bool opa = rdma_cap_opa_mad(rmpp_recv->agent->qp_info->port_priv->device, - rmpp_recv->agent->qp_info->port_priv->port_num); - - rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; - - hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - if (opa && rmpp_recv->base_version == OPA_MGMT_BASE_VERSION) { - data_size = sizeof(struct opa_rmpp_mad) - hdr_size; - pad = OPA_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (pad > OPA_MGMT_RMPP_DATA || pad < 0) - pad = 0; - } else { - data_size = sizeof(struct ib_rmpp_mad) - hdr_size; - pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (pad > IB_MGMT_RMPP_DATA || pad < 0) - pad = 0; - } - - return hdr_size + rmpp_recv->seg_num * data_size - pad; -} - -static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) -{ - struct ib_mad_recv_wc *rmpp_wc; - - ack_recv(rmpp_recv, rmpp_recv->rmpp_wc); - if (rmpp_recv->seg_num > 1) - cancel_delayed_work(&rmpp_recv->timeout_work); - - rmpp_wc = rmpp_recv->rmpp_wc; - rmpp_wc->mad_len = get_mad_len(rmpp_recv); - /* 10 seconds until we can find the packet lifetime */ - queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq, - &rmpp_recv->cleanup_work, msecs_to_jiffies(10000)); - return rmpp_wc; -} - -static struct ib_mad_recv_wc * -continue_rmpp(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct mad_rmpp_recv *rmpp_recv; - struct ib_mad_recv_buf *prev_buf; - struct ib_mad_recv_wc *done_wc; - int seg_num; - unsigned long flags; - - rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc); - if (!rmpp_recv) - goto drop1; - - seg_num = get_seg_num(&mad_recv_wc->recv_buf); - - spin_lock_irqsave(&rmpp_recv->lock, flags); - if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) || - (seg_num > rmpp_recv->newwin)) - goto drop3; - - if ((seg_num <= rmpp_recv->last_ack) || - (rmpp_recv->state == RMPP_STATE_COMPLETE)) { - spin_unlock_irqrestore(&rmpp_recv->lock, flags); - ack_recv(rmpp_recv, mad_recv_wc); - goto drop2; - } - - prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num); - if (!prev_buf) - goto drop3; - - done_wc = NULL; - list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list); - if (rmpp_recv->cur_seg_buf == prev_buf) { - update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf); - if (get_last_flag(rmpp_recv->cur_seg_buf)) { - rmpp_recv->state = RMPP_STATE_COMPLETE; - spin_unlock_irqrestore(&rmpp_recv->lock, flags); - done_wc = complete_rmpp(rmpp_recv); - goto out; - } else if (rmpp_recv->seg_num == rmpp_recv->newwin) { - rmpp_recv->newwin += window_size(agent); - spin_unlock_irqrestore(&rmpp_recv->lock, flags); - ack_recv(rmpp_recv, mad_recv_wc); - goto out; - } - } - spin_unlock_irqrestore(&rmpp_recv->lock, flags); -out: - deref_rmpp_recv(rmpp_recv); - return done_wc; - -drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags); -drop2: deref_rmpp_recv(rmpp_recv); -drop1: ib_free_recv_mad(mad_recv_wc); - return NULL; -} - -static struct ib_mad_recv_wc * -start_rmpp(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct mad_rmpp_recv *rmpp_recv; - unsigned long flags; - - rmpp_recv = create_rmpp_recv(agent, mad_recv_wc); - if (!rmpp_recv) { - ib_free_recv_mad(mad_recv_wc); - return NULL; - } - - spin_lock_irqsave(&agent->lock, flags); - if (insert_rmpp_recv(agent, rmpp_recv)) { - spin_unlock_irqrestore(&agent->lock, flags); - /* duplicate first MAD */ - destroy_rmpp_recv(rmpp_recv); - return continue_rmpp(agent, mad_recv_wc); - } - atomic_inc(&rmpp_recv->refcount); - - if (get_last_flag(&mad_recv_wc->recv_buf)) { - rmpp_recv->state = RMPP_STATE_COMPLETE; - spin_unlock_irqrestore(&agent->lock, flags); - complete_rmpp(rmpp_recv); - } else { - spin_unlock_irqrestore(&agent->lock, flags); - /* 40 seconds until we can find the packet lifetimes */ - queue_delayed_work(agent->qp_info->port_priv->wq, - &rmpp_recv->timeout_work, - msecs_to_jiffies(40000)); - rmpp_recv->newwin += window_size(agent); - ack_recv(rmpp_recv, mad_recv_wc); - mad_recv_wc = NULL; - } - deref_rmpp_recv(rmpp_recv); - return mad_recv_wc; -} - -static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_rmpp_mad *rmpp_mad; - int timeout; - u32 paylen = 0; - - rmpp_mad = mad_send_wr->send_buf.mad; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num); - - if (mad_send_wr->seg_num == 1) { - rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - paylen = (mad_send_wr->send_buf.seg_count * - mad_send_wr->send_buf.seg_rmpp_size) - - mad_send_wr->pad; - } - - if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { - rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; - paylen = mad_send_wr->send_buf.seg_rmpp_size - mad_send_wr->pad; - } - rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); - - /* 2 seconds for an ACK until we can find the packet lifetime */ - timeout = mad_send_wr->send_buf.timeout_ms; - if (!timeout || timeout > 2000) - mad_send_wr->timeout = msecs_to_jiffies(2000); - - return ib_send_mad(mad_send_wr); -} - -static void abort_send(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status) -{ - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc wc; - unsigned long flags; - - spin_lock_irqsave(&agent->lock, flags); - mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); - if (!mad_send_wr) - goto out; /* Unmatched send */ - - if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || - (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) - goto out; /* Send is already done */ - - ib_mark_mad_done(mad_send_wr); - spin_unlock_irqrestore(&agent->lock, flags); - - wc.status = IB_WC_REM_ABORT_ERR; - wc.vendor_err = rmpp_status; - wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &wc); - return; -out: - spin_unlock_irqrestore(&agent->lock, flags); -} - -static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, - int seg_num) -{ - struct list_head *list; - - wr->last_ack = seg_num; - list = &wr->last_ack_seg->list; - list_for_each_entry(wr->last_ack_seg, list, list) - if (wr->last_ack_seg->num == seg_num) - break; -} - -static void process_ds_ack(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc, int newwin) -{ - struct mad_rmpp_recv *rmpp_recv; - - rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); - if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE) - rmpp_recv->repwin = newwin; -} - -static void process_rmpp_ack(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_rmpp_mad *rmpp_mad; - unsigned long flags; - int seg_num, newwin, ret; - - rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; - if (rmpp_mad->rmpp_hdr.rmpp_status) { - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); - return; - } - - seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); - newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (newwin < seg_num) { - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); - return; - } - - spin_lock_irqsave(&agent->lock, flags); - mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); - if (!mad_send_wr) { - if (!seg_num) - process_ds_ack(agent, mad_recv_wc, newwin); - goto out; /* Unmatched or DS RMPP ACK */ - } - - if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) && - (mad_send_wr->timeout)) { - spin_unlock_irqrestore(&agent->lock, flags); - ack_ds_ack(agent, mad_recv_wc); - return; /* Repeated ACK for DS RMPP transaction */ - } - - if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || - (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) - goto out; /* Send is already done */ - - if (seg_num > mad_send_wr->send_buf.seg_count || - seg_num > mad_send_wr->newwin) { - spin_unlock_irqrestore(&agent->lock, flags); - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); - return; - } - - if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack) - goto out; /* Old ACK */ - - if (seg_num > mad_send_wr->last_ack) { - adjust_last_ack(mad_send_wr, seg_num); - mad_send_wr->retries_left = mad_send_wr->max_retries; - } - mad_send_wr->newwin = newwin; - if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { - /* If no response is expected, the ACK completes the send */ - if (!mad_send_wr->send_buf.timeout_ms) { - struct ib_mad_send_wc wc; - - ib_mark_mad_done(mad_send_wr); - spin_unlock_irqrestore(&agent->lock, flags); - - wc.status = IB_WC_SUCCESS; - wc.vendor_err = 0; - wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &wc); - return; - } - if (mad_send_wr->refcount == 1) - ib_reset_mad_timeout(mad_send_wr, - mad_send_wr->send_buf.timeout_ms); - spin_unlock_irqrestore(&agent->lock, flags); - ack_ds_ack(agent, mad_recv_wc); - return; - } else if (mad_send_wr->refcount == 1 && - mad_send_wr->seg_num < mad_send_wr->newwin && - mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { - /* Send failure will just result in a timeout/retry */ - ret = send_next_seg(mad_send_wr); - if (ret) - goto out; - - mad_send_wr->refcount++; - list_move_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->send_list); - } -out: - spin_unlock_irqrestore(&agent->lock, flags); -} - -static struct ib_mad_recv_wc * -process_rmpp_data(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_rmpp_hdr *rmpp_hdr; - u8 rmpp_status; - - rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr; - - if (rmpp_hdr->rmpp_status) { - rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS; - goto bad; - } - - if (rmpp_hdr->seg_num == cpu_to_be32(1)) { - if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) { - rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; - goto bad; - } - return start_rmpp(agent, mad_recv_wc); - } else { - if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) { - rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; - goto bad; - } - return continue_rmpp(agent, mad_recv_wc); - } -bad: - nack_recv(agent, mad_recv_wc, rmpp_status); - ib_free_recv_mad(mad_recv_wc); - return NULL; -} - -static void process_rmpp_stop(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_rmpp_mad *rmpp_mad; - - rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; - - if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); - } else - abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); -} - -static void process_rmpp_abort(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_rmpp_mad *rmpp_mad; - - rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; - - if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || - rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); - } else - abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); -} - -struct ib_mad_recv_wc * -ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_rmpp_mad *rmpp_mad; - - rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; - if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE)) - return mad_recv_wc; - - if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); - goto out; - } - - switch (rmpp_mad->rmpp_hdr.rmpp_type) { - case IB_MGMT_RMPP_TYPE_DATA: - return process_rmpp_data(agent, mad_recv_wc); - case IB_MGMT_RMPP_TYPE_ACK: - process_rmpp_ack(agent, mad_recv_wc); - break; - case IB_MGMT_RMPP_TYPE_STOP: - process_rmpp_stop(agent, mad_recv_wc); - break; - case IB_MGMT_RMPP_TYPE_ABORT: - process_rmpp_abort(agent, mad_recv_wc); - break; - default: - abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); - nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); - break; - } -out: - ib_free_recv_mad(mad_recv_wc); - return NULL; -} - -static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; - struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; - struct mad_rmpp_recv *rmpp_recv; - struct ib_ah_attr ah_attr; - unsigned long flags; - int newwin = 1; - - if (!(mad_hdr->method & IB_MGMT_METHOD_RESP)) - goto out; - - spin_lock_irqsave(&agent->lock, flags); - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { - if (rmpp_recv->tid != mad_hdr->tid || - rmpp_recv->mgmt_class != mad_hdr->mgmt_class || - rmpp_recv->class_version != mad_hdr->class_version || - (rmpp_recv->method & IB_MGMT_METHOD_RESP)) - continue; - - if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) - continue; - - if (rmpp_recv->slid == ah_attr.dlid) { - newwin = rmpp_recv->repwin; - break; - } - } - spin_unlock_irqrestore(&agent->lock, flags); -out: - return newwin; -} - -int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_rmpp_mad *rmpp_mad; - int ret; - - rmpp_mad = mad_send_wr->send_buf.mad; - if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE)) - return IB_RMPP_RESULT_UNHANDLED; - - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { - mad_send_wr->seg_num = 1; - return IB_RMPP_RESULT_INTERNAL; - } - - mad_send_wr->newwin = init_newwin(mad_send_wr); - - /* We need to wait for the final ACK even if there isn't a response */ - mad_send_wr->refcount += (mad_send_wr->timeout == 0); - ret = send_next_seg(mad_send_wr); - if (!ret) - return IB_RMPP_RESULT_CONSUMED; - return ret; -} - -int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, - struct ib_mad_send_wc *mad_send_wc) -{ - struct ib_rmpp_mad *rmpp_mad; - int ret; - - rmpp_mad = mad_send_wr->send_buf.mad; - if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE)) - return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) - return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ - - if (mad_send_wc->status != IB_WC_SUCCESS || - mad_send_wr->status != IB_WC_SUCCESS) - return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */ - - if (!mad_send_wr->timeout) - return IB_RMPP_RESULT_PROCESSED; /* Response received */ - - if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { - mad_send_wr->timeout = - msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); - return IB_RMPP_RESULT_PROCESSED; /* Send done */ - } - - if (mad_send_wr->seg_num == mad_send_wr->newwin || - mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) - return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ - - ret = send_next_seg(mad_send_wr); - if (ret) { - mad_send_wc->status = IB_WC_GENERAL_ERR; - return IB_RMPP_RESULT_PROCESSED; - } - return IB_RMPP_RESULT_CONSUMED; -} - -int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_rmpp_mad *rmpp_mad; - int ret; - - rmpp_mad = mad_send_wr->send_buf.mad; - if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE)) - return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - - if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) - return IB_RMPP_RESULT_PROCESSED; - - mad_send_wr->seg_num = mad_send_wr->last_ack; - mad_send_wr->cur_seg = mad_send_wr->last_ack_seg; - - ret = send_next_seg(mad_send_wr); - if (ret) - return IB_RMPP_RESULT_PROCESSED; - - return IB_RMPP_RESULT_CONSUMED; -} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad_rmpp.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/packer.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/packer.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/packer.c (nonexistent) @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include - -static u64 value_read(int offset, int size, void *structure) -{ - switch (size) { - case 1: return *(u8 *) ((char *)structure + offset); - case 2: return be16_to_cpup((__be16 *) ((char *)structure + offset)); - case 4: return be32_to_cpup((__be32 *) ((char *)structure + offset)); - case 8: return be64_to_cpup((__be64 *) ((char *)structure + offset)); - default: - pr_warn("Field size %d bits not handled\n", size * 8); - return 0; - } -} - -/** - * ib_pack - Pack a structure into a buffer - * @desc:Array of structure field descriptions - * @desc_len:Number of entries in @desc - * @structure:Structure to pack from - * @buf:Buffer to pack into - * - * ib_pack() packs a list of structure fields into a buffer, - * controlled by the array of fields in @desc. - */ -void ib_pack(const struct ib_field *desc, - int desc_len, - void *structure, - void *buf) -{ - int i; - - for (i = 0; i < desc_len; ++i) { - if (desc[i].size_bits <= 32) { - int shift; - u32 val; - __be32 mask; - __be32 *addr; - - shift = 32 - desc[i].offset_bits - desc[i].size_bits; - if (desc[i].struct_size_bytes) - val = value_read(desc[i].struct_offset_bytes, - desc[i].struct_size_bytes, - structure) << shift; - else - val = 0; - - mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift); - addr = (__be32 *) buf + desc[i].offset_words; - *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask); - } else if (desc[i].size_bits <= 64) { - int shift; - u64 val; - __be64 mask; - __be64 *addr; - - shift = 64 - desc[i].offset_bits - desc[i].size_bits; - if (desc[i].struct_size_bytes) - val = value_read(desc[i].struct_offset_bytes, - desc[i].struct_size_bytes, - structure) << shift; - else - val = 0; - - mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); - addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); - *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); - } else { - if (desc[i].offset_bits % 8 || - desc[i].size_bits % 8) { - pr_warn("Structure field %s of size %d bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); - } - - if (desc[i].struct_size_bytes) - memcpy((char *)buf + desc[i].offset_words * 4 + - desc[i].offset_bits / 8, - (char *)structure + desc[i].struct_offset_bytes, - desc[i].size_bits / 8); - else - memset((char *)buf + desc[i].offset_words * 4 + - desc[i].offset_bits / 8, - 0, - desc[i].size_bits / 8); - } - } -} -EXPORT_SYMBOL(ib_pack); - -static void value_write(int offset, int size, u64 val, void *structure) -{ - switch (size * 8) { - case 8: *( u8 *) ((char *)structure + offset) = val; break; - case 16: *(__be16 *) ((char *)structure + offset) = cpu_to_be16(val); break; - case 32: *(__be32 *) ((char *)structure + offset) = cpu_to_be32(val); break; - case 64: *(__be64 *) ((char *)structure + offset) = cpu_to_be64(val); break; - default: - pr_warn("Field size %d bits not handled\n", size * 8); - } -} - -/** - * ib_unpack - Unpack a buffer into a structure - * @desc:Array of structure field descriptions - * @desc_len:Number of entries in @desc - * @buf:Buffer to unpack from - * @structure:Structure to unpack into - * - * ib_pack() unpacks a list of structure fields from a buffer, - * controlled by the array of fields in @desc. - */ -void ib_unpack(const struct ib_field *desc, - int desc_len, - void *buf, - void *structure) -{ - int i; - - for (i = 0; i < desc_len; ++i) { - if (!desc[i].struct_size_bytes) - continue; - - if (desc[i].size_bits <= 32) { - int shift; - u32 val; - u32 mask; - __be32 *addr; - - shift = 32 - desc[i].offset_bits - desc[i].size_bits; - mask = ((1ull << desc[i].size_bits) - 1) << shift; - addr = (__be32 *) buf + desc[i].offset_words; - val = (be32_to_cpup(addr) & mask) >> shift; - value_write(desc[i].struct_offset_bytes, - desc[i].struct_size_bytes, - val, - structure); - } else if (desc[i].size_bits <= 64) { - int shift; - u64 val; - u64 mask; - __be64 *addr; - - shift = 64 - desc[i].offset_bits - desc[i].size_bits; - mask = (~0ull >> (64 - desc[i].size_bits)) << shift; - addr = (__be64 *) buf + desc[i].offset_words; - val = (be64_to_cpup(addr) & mask) >> shift; - value_write(desc[i].struct_offset_bytes, - desc[i].struct_size_bytes, - val, - structure); - } else { - if (desc[i].offset_bits % 8 || - desc[i].size_bits % 8) { - pr_warn("Structure field %s of size %d bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); - } - - memcpy((char *)structure + desc[i].struct_offset_bytes, - (char *)buf + desc[i].offset_words * 4 + - desc[i].offset_bits / 8, - desc[i].size_bits / 8); - } - } -} -EXPORT_SYMBOL(ib_unpack); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/packer.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ud_header.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ud_header.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ud_header.c (nonexistent) @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include - -#include - -#define STRUCT_FIELD(header, field) \ - .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ - .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ - .field_name = #header ":" #field - -static const struct ib_field lrh_table[] = { - { STRUCT_FIELD(lrh, virtual_lane), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 4 }, - { STRUCT_FIELD(lrh, link_version), - .offset_words = 0, - .offset_bits = 4, - .size_bits = 4 }, - { STRUCT_FIELD(lrh, service_level), - .offset_words = 0, - .offset_bits = 8, - .size_bits = 4 }, - { RESERVED, - .offset_words = 0, - .offset_bits = 12, - .size_bits = 2 }, - { STRUCT_FIELD(lrh, link_next_header), - .offset_words = 0, - .offset_bits = 14, - .size_bits = 2 }, - { STRUCT_FIELD(lrh, destination_lid), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 16 }, - { RESERVED, - .offset_words = 1, - .offset_bits = 0, - .size_bits = 5 }, - { STRUCT_FIELD(lrh, packet_length), - .offset_words = 1, - .offset_bits = 5, - .size_bits = 11 }, - { STRUCT_FIELD(lrh, source_lid), - .offset_words = 1, - .offset_bits = 16, - .size_bits = 16 } -}; - -static const struct ib_field eth_table[] = { - { STRUCT_FIELD(eth, dmac_h), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 32 }, - { STRUCT_FIELD(eth, dmac_l), - .offset_words = 1, - .offset_bits = 0, - .size_bits = 16 }, - { STRUCT_FIELD(eth, smac_h), - .offset_words = 1, - .offset_bits = 16, - .size_bits = 16 }, - { STRUCT_FIELD(eth, smac_l), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 32 }, - { STRUCT_FIELD(eth, type), - .offset_words = 3, - .offset_bits = 0, - .size_bits = 16 } -}; - -static const struct ib_field vlan_table[] = { - { STRUCT_FIELD(vlan, tag), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 16 }, - { STRUCT_FIELD(vlan, type), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 16 } -}; - -static const struct ib_field ip4_table[] = { - { STRUCT_FIELD(ip4, ver), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 4 }, - { STRUCT_FIELD(ip4, hdr_len), - .offset_words = 0, - .offset_bits = 4, - .size_bits = 4 }, - { STRUCT_FIELD(ip4, tos), - .offset_words = 0, - .offset_bits = 8, - .size_bits = 8 }, - { STRUCT_FIELD(ip4, tot_len), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 16 }, - { STRUCT_FIELD(ip4, id), - .offset_words = 1, - .offset_bits = 0, - .size_bits = 16 }, - { STRUCT_FIELD(ip4, frag_off), - .offset_words = 1, - .offset_bits = 16, - .size_bits = 16 }, - { STRUCT_FIELD(ip4, ttl), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 8 }, - { STRUCT_FIELD(ip4, protocol), - .offset_words = 2, - .offset_bits = 8, - .size_bits = 8 }, - { STRUCT_FIELD(ip4, check), - .offset_words = 2, - .offset_bits = 16, - .size_bits = 16 }, - { STRUCT_FIELD(ip4, saddr), - .offset_words = 3, - .offset_bits = 0, - .size_bits = 32 }, - { STRUCT_FIELD(ip4, daddr), - .offset_words = 4, - .offset_bits = 0, - .size_bits = 32 } -}; - -static const struct ib_field udp_table[] = { - { STRUCT_FIELD(udp, sport), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 16 }, - { STRUCT_FIELD(udp, dport), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 16 }, - { STRUCT_FIELD(udp, length), - .offset_words = 1, - .offset_bits = 0, - .size_bits = 16 }, - { STRUCT_FIELD(udp, csum), - .offset_words = 1, - .offset_bits = 16, - .size_bits = 16 } -}; - -static const struct ib_field grh_table[] = { - { STRUCT_FIELD(grh, ip_version), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 4 }, - { STRUCT_FIELD(grh, traffic_class), - .offset_words = 0, - .offset_bits = 4, - .size_bits = 8 }, - { STRUCT_FIELD(grh, flow_label), - .offset_words = 0, - .offset_bits = 12, - .size_bits = 20 }, - { STRUCT_FIELD(grh, payload_length), - .offset_words = 1, - .offset_bits = 0, - .size_bits = 16 }, - { STRUCT_FIELD(grh, next_header), - .offset_words = 1, - .offset_bits = 16, - .size_bits = 8 }, - { STRUCT_FIELD(grh, hop_limit), - .offset_words = 1, - .offset_bits = 24, - .size_bits = 8 }, - { STRUCT_FIELD(grh, source_gid), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 128 }, - { STRUCT_FIELD(grh, destination_gid), - .offset_words = 6, - .offset_bits = 0, - .size_bits = 128 } -}; - -static const struct ib_field bth_table[] = { - { STRUCT_FIELD(bth, opcode), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 8 }, - { STRUCT_FIELD(bth, solicited_event), - .offset_words = 0, - .offset_bits = 8, - .size_bits = 1 }, - { STRUCT_FIELD(bth, mig_req), - .offset_words = 0, - .offset_bits = 9, - .size_bits = 1 }, - { STRUCT_FIELD(bth, pad_count), - .offset_words = 0, - .offset_bits = 10, - .size_bits = 2 }, - { STRUCT_FIELD(bth, transport_header_version), - .offset_words = 0, - .offset_bits = 12, - .size_bits = 4 }, - { STRUCT_FIELD(bth, pkey), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 16 }, - { RESERVED, - .offset_words = 1, - .offset_bits = 0, - .size_bits = 8 }, - { STRUCT_FIELD(bth, destination_qpn), - .offset_words = 1, - .offset_bits = 8, - .size_bits = 24 }, - { STRUCT_FIELD(bth, ack_req), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 1 }, - { RESERVED, - .offset_words = 2, - .offset_bits = 1, - .size_bits = 7 }, - { STRUCT_FIELD(bth, psn), - .offset_words = 2, - .offset_bits = 8, - .size_bits = 24 } -}; - -static const struct ib_field deth_table[] = { - { STRUCT_FIELD(deth, qkey), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 32 }, - { RESERVED, - .offset_words = 1, - .offset_bits = 0, - .size_bits = 8 }, - { STRUCT_FIELD(deth, source_qpn), - .offset_words = 1, - .offset_bits = 8, - .size_bits = 24 } -}; - -__sum16 ib_ud_ip4_csum(struct ib_ud_header *header) -{ - struct ip iph; - - iph.ip_hl = 5; - iph.ip_v = 4; - iph.ip_tos = header->ip4.tos; - iph.ip_len = header->ip4.tot_len; - iph.ip_id = header->ip4.id; - iph.ip_off = header->ip4.frag_off; - iph.ip_ttl = header->ip4.ttl; - iph.ip_p = header->ip4.protocol; - iph.ip_sum = 0; - iph.ip_src.s_addr = header->ip4.saddr; - iph.ip_dst.s_addr = header->ip4.daddr; - - return in_cksum_hdr(&iph); -} -EXPORT_SYMBOL(ib_ud_ip4_csum); - -/** - * ib_ud_header_init - Initialize UD header structure - * @payload_bytes:Length of packet payload - * @lrh_present: specify if LRH is present - * @eth_present: specify if Eth header is present - * @vlan_present: packet is tagged vlan - * @grh_present: GRH flag (if non-zero, GRH will be included) - * @ip_version: if non-zero, IP header, V4 or V6, will be included - * @udp_present :if non-zero, UDP header will be included - * @immediate_present: specify if immediate data is present - * @header:Structure to initialize - */ -int ib_ud_header_init(int payload_bytes, - int lrh_present, - int eth_present, - int vlan_present, - int grh_present, - int ip_version, - int udp_present, - int immediate_present, - struct ib_ud_header *header) -{ - size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0; - - grh_present = grh_present && !ip_version; - memset(header, 0, sizeof *header); - - /* - * UDP header without IP header doesn't make sense - */ - if (udp_present && ip_version != 4 && ip_version != 6) - return -EINVAL; - - if (lrh_present) { - u16 packet_length; - - header->lrh.link_version = 0; - header->lrh.link_next_header = - grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; - packet_length = (IB_LRH_BYTES + - IB_BTH_BYTES + - IB_DETH_BYTES + - (grh_present ? IB_GRH_BYTES : 0) + - payload_bytes + - 4 + /* ICRC */ - 3) / 4; /* round up */ - header->lrh.packet_length = cpu_to_be16(packet_length); - } - - if (vlan_present) - header->eth.type = cpu_to_be16(ETH_P_8021Q); - - if (ip_version == 6 || grh_present) { - header->grh.ip_version = 6; - header->grh.payload_length = - cpu_to_be16((udp_bytes + - IB_BTH_BYTES + - IB_DETH_BYTES + - payload_bytes + - 4 + /* ICRC */ - 3) & ~3); /* round up */ - header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b; - } - - if (ip_version == 4) { - header->ip4.ver = 4; /* version 4 */ - header->ip4.hdr_len = 5; /* 5 words */ - header->ip4.tot_len = - cpu_to_be16(IB_IP4_BYTES + - udp_bytes + - IB_BTH_BYTES + - IB_DETH_BYTES + - payload_bytes + - 4); /* ICRC */ - header->ip4.protocol = IPPROTO_UDP; - } - if (udp_present && ip_version) - header->udp.length = - cpu_to_be16(IB_UDP_BYTES + - IB_BTH_BYTES + - IB_DETH_BYTES + - payload_bytes + - 4); /* ICRC */ - - if (immediate_present) - header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; - else - header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; - header->bth.pad_count = (4 - payload_bytes) & 3; - header->bth.transport_header_version = 0; - - header->lrh_present = lrh_present; - header->eth_present = eth_present; - header->vlan_present = vlan_present; - header->grh_present = grh_present || (ip_version == 6); - header->ipv4_present = ip_version == 4; - header->udp_present = udp_present; - header->immediate_present = immediate_present; - return 0; -} -EXPORT_SYMBOL(ib_ud_header_init); - -/** - * ib_ud_header_pack - Pack UD header struct into wire format - * @header:UD header struct - * @buf:Buffer to pack into - * - * ib_ud_header_pack() packs the UD header structure @header into wire - * format in the buffer @buf. - */ -int ib_ud_header_pack(struct ib_ud_header *header, - void *buf) -{ - int len = 0; - - if (header->lrh_present) { - ib_pack(lrh_table, ARRAY_SIZE(lrh_table), - &header->lrh, (char *)buf + len); - len += IB_LRH_BYTES; - } - if (header->eth_present) { - ib_pack(eth_table, ARRAY_SIZE(eth_table), - &header->eth, (char *)buf + len); - len += IB_ETH_BYTES; - } - if (header->vlan_present) { - ib_pack(vlan_table, ARRAY_SIZE(vlan_table), - &header->vlan, (char *)buf + len); - len += IB_VLAN_BYTES; - } - if (header->grh_present) { - ib_pack(grh_table, ARRAY_SIZE(grh_table), - &header->grh, (char *)buf + len); - len += IB_GRH_BYTES; - } - if (header->ipv4_present) { - ib_pack(ip4_table, ARRAY_SIZE(ip4_table), - &header->ip4, (char *)buf + len); - len += IB_IP4_BYTES; - } - if (header->udp_present) { - ib_pack(udp_table, ARRAY_SIZE(udp_table), - &header->udp, (char *)buf + len); - len += IB_UDP_BYTES; - } - - ib_pack(bth_table, ARRAY_SIZE(bth_table), - &header->bth, (char *)buf + len); - len += IB_BTH_BYTES; - - ib_pack(deth_table, ARRAY_SIZE(deth_table), - &header->deth, (char *)buf + len); - len += IB_DETH_BYTES; - - if (header->immediate_present) { - memcpy((char *)buf + len, &header->immediate_data, sizeof header->immediate_data); - len += sizeof header->immediate_data; - } - - return len; -} -EXPORT_SYMBOL(ib_ud_header_pack); - -/** - * ib_ud_header_unpack - Unpack UD header struct from wire format - * @header:UD header struct - * @buf:Buffer to pack into - * - * ib_ud_header_pack() unpacks the UD header structure @header from wire - * format in the buffer @buf. - */ -int ib_ud_header_unpack(void *buf, - struct ib_ud_header *header) -{ - ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), - buf, &header->lrh); - buf = (char *)buf + IB_LRH_BYTES; - - if (header->lrh.link_version != 0) { - pr_warn("Invalid LRH.link_version %d\n", - header->lrh.link_version); - return -EINVAL; - } - - switch (header->lrh.link_next_header) { - case IB_LNH_IBA_LOCAL: - header->grh_present = 0; - break; - - case IB_LNH_IBA_GLOBAL: - header->grh_present = 1; - ib_unpack(grh_table, ARRAY_SIZE(grh_table), - buf, &header->grh); - buf = (char *)buf + IB_GRH_BYTES; - - if (header->grh.ip_version != 6) { - pr_warn("Invalid GRH.ip_version %d\n", - header->grh.ip_version); - return -EINVAL; - } - if (header->grh.next_header != 0x1b) { - pr_warn("Invalid GRH.next_header 0x%02x\n", - header->grh.next_header); - return -EINVAL; - } - break; - - default: - pr_warn("Invalid LRH.link_next_header %d\n", - header->lrh.link_next_header); - return -EINVAL; - } - - ib_unpack(bth_table, ARRAY_SIZE(bth_table), - buf, &header->bth); - buf = (char *)buf + IB_BTH_BYTES; - - switch (header->bth.opcode) { - case IB_OPCODE_UD_SEND_ONLY: - header->immediate_present = 0; - break; - case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: - header->immediate_present = 1; - break; - default: - pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode); - return -EINVAL; - } - - if (header->bth.transport_header_version != 0) { - pr_warn("Invalid BTH.transport_header_version %d\n", - header->bth.transport_header_version); - return -EINVAL; - } - - ib_unpack(deth_table, ARRAY_SIZE(deth_table), - buf, &header->deth); - buf = (char *)buf + IB_DETH_BYTES; - - if (header->immediate_present) - memcpy(&header->immediate_data, buf, sizeof header->immediate_data); - - return 0; -} -EXPORT_SYMBOL(ib_ud_header_unpack); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ud_header.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/sa_query.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/sa_query.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/sa_query.c (nonexistent) @@ -1,1580 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "sa.h" -#include "core_priv.h" - -#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 -#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 -#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 - -struct ib_sa_sm_ah { - struct ib_ah *ah; - struct kref ref; - u16 pkey_index; - u8 src_path_mask; -}; - -struct ib_sa_classport_cache { - bool valid; - struct ib_class_port_info data; -}; - -struct ib_sa_port { - struct ib_mad_agent *agent; - struct ib_sa_sm_ah *sm_ah; - struct work_struct update_task; - struct ib_sa_classport_cache classport_info; - spinlock_t classport_lock; /* protects class port info set */ - spinlock_t ah_lock; - u8 port_num; -}; - -struct ib_sa_device { - int start_port, end_port; - struct ib_event_handler event_handler; - struct ib_sa_port port[0]; -}; - -struct ib_sa_query { - void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); - void (*release)(struct ib_sa_query *); - struct ib_sa_client *client; - struct ib_sa_port *port; - struct ib_mad_send_buf *mad_buf; - struct ib_sa_sm_ah *sm_ah; - int id; - u32 flags; - struct list_head list; /* Local svc request list */ - u32 seq; /* Local svc request sequence number */ - unsigned long timeout; /* Local svc timeout */ - u8 path_use; /* How will the pathrecord be used */ -}; - -#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 -#define IB_SA_CANCEL 0x00000002 - -struct ib_sa_service_query { - void (*callback)(int, struct ib_sa_service_rec *, void *); - void *context; - struct ib_sa_query sa_query; -}; - -struct ib_sa_path_query { - void (*callback)(int, struct ib_sa_path_rec *, void *); - void *context; - struct ib_sa_query sa_query; -}; - -struct ib_sa_guidinfo_query { - void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); - void *context; - struct ib_sa_query sa_query; -}; - -struct ib_sa_classport_info_query { - void (*callback)(int, struct ib_class_port_info *, void *); - void *context; - struct ib_sa_query sa_query; -}; - -struct ib_sa_mcmember_query { - void (*callback)(int, struct ib_sa_mcmember_rec *, void *); - void *context; - struct ib_sa_query sa_query; -}; - -static void ib_sa_add_one(struct ib_device *device); -static void ib_sa_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client sa_client = { - .name = "sa", - .add = ib_sa_add_one, - .remove = ib_sa_remove_one -}; - -static DEFINE_SPINLOCK(idr_lock); -static DEFINE_IDR(query_idr); - -static DEFINE_SPINLOCK(tid_lock); -static u32 tid; - -#define PATH_REC_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ - .field_name = "sa_path_rec:" #field - -static const struct ib_field path_rec_table[] = { - { PATH_REC_FIELD(service_id), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 64 }, - { PATH_REC_FIELD(dgid), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 128 }, - { PATH_REC_FIELD(sgid), - .offset_words = 6, - .offset_bits = 0, - .size_bits = 128 }, - { PATH_REC_FIELD(dlid), - .offset_words = 10, - .offset_bits = 0, - .size_bits = 16 }, - { PATH_REC_FIELD(slid), - .offset_words = 10, - .offset_bits = 16, - .size_bits = 16 }, - { PATH_REC_FIELD(raw_traffic), - .offset_words = 11, - .offset_bits = 0, - .size_bits = 1 }, - { RESERVED, - .offset_words = 11, - .offset_bits = 1, - .size_bits = 3 }, - { PATH_REC_FIELD(flow_label), - .offset_words = 11, - .offset_bits = 4, - .size_bits = 20 }, - { PATH_REC_FIELD(hop_limit), - .offset_words = 11, - .offset_bits = 24, - .size_bits = 8 }, - { PATH_REC_FIELD(traffic_class), - .offset_words = 12, - .offset_bits = 0, - .size_bits = 8 }, - { PATH_REC_FIELD(reversible), - .offset_words = 12, - .offset_bits = 8, - .size_bits = 1 }, - { PATH_REC_FIELD(numb_path), - .offset_words = 12, - .offset_bits = 9, - .size_bits = 7 }, - { PATH_REC_FIELD(pkey), - .offset_words = 12, - .offset_bits = 16, - .size_bits = 16 }, - { PATH_REC_FIELD(qos_class), - .offset_words = 13, - .offset_bits = 0, - .size_bits = 12 }, - { PATH_REC_FIELD(sl), - .offset_words = 13, - .offset_bits = 12, - .size_bits = 4 }, - { PATH_REC_FIELD(mtu_selector), - .offset_words = 13, - .offset_bits = 16, - .size_bits = 2 }, - { PATH_REC_FIELD(mtu), - .offset_words = 13, - .offset_bits = 18, - .size_bits = 6 }, - { PATH_REC_FIELD(rate_selector), - .offset_words = 13, - .offset_bits = 24, - .size_bits = 2 }, - { PATH_REC_FIELD(rate), - .offset_words = 13, - .offset_bits = 26, - .size_bits = 6 }, - { PATH_REC_FIELD(packet_life_time_selector), - .offset_words = 14, - .offset_bits = 0, - .size_bits = 2 }, - { PATH_REC_FIELD(packet_life_time), - .offset_words = 14, - .offset_bits = 2, - .size_bits = 6 }, - { PATH_REC_FIELD(preference), - .offset_words = 14, - .offset_bits = 8, - .size_bits = 8 }, - { RESERVED, - .offset_words = 14, - .offset_bits = 16, - .size_bits = 48 }, -}; - -#define MCMEMBER_REC_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ - .field_name = "sa_mcmember_rec:" #field - -static const struct ib_field mcmember_rec_table[] = { - { MCMEMBER_REC_FIELD(mgid), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 128 }, - { MCMEMBER_REC_FIELD(port_gid), - .offset_words = 4, - .offset_bits = 0, - .size_bits = 128 }, - { MCMEMBER_REC_FIELD(qkey), - .offset_words = 8, - .offset_bits = 0, - .size_bits = 32 }, - { MCMEMBER_REC_FIELD(mlid), - .offset_words = 9, - .offset_bits = 0, - .size_bits = 16 }, - { MCMEMBER_REC_FIELD(mtu_selector), - .offset_words = 9, - .offset_bits = 16, - .size_bits = 2 }, - { MCMEMBER_REC_FIELD(mtu), - .offset_words = 9, - .offset_bits = 18, - .size_bits = 6 }, - { MCMEMBER_REC_FIELD(traffic_class), - .offset_words = 9, - .offset_bits = 24, - .size_bits = 8 }, - { MCMEMBER_REC_FIELD(pkey), - .offset_words = 10, - .offset_bits = 0, - .size_bits = 16 }, - { MCMEMBER_REC_FIELD(rate_selector), - .offset_words = 10, - .offset_bits = 16, - .size_bits = 2 }, - { MCMEMBER_REC_FIELD(rate), - .offset_words = 10, - .offset_bits = 18, - .size_bits = 6 }, - { MCMEMBER_REC_FIELD(packet_life_time_selector), - .offset_words = 10, - .offset_bits = 24, - .size_bits = 2 }, - { MCMEMBER_REC_FIELD(packet_life_time), - .offset_words = 10, - .offset_bits = 26, - .size_bits = 6 }, - { MCMEMBER_REC_FIELD(sl), - .offset_words = 11, - .offset_bits = 0, - .size_bits = 4 }, - { MCMEMBER_REC_FIELD(flow_label), - .offset_words = 11, - .offset_bits = 4, - .size_bits = 20 }, - { MCMEMBER_REC_FIELD(hop_limit), - .offset_words = 11, - .offset_bits = 24, - .size_bits = 8 }, - { MCMEMBER_REC_FIELD(scope), - .offset_words = 12, - .offset_bits = 0, - .size_bits = 4 }, - { MCMEMBER_REC_FIELD(join_state), - .offset_words = 12, - .offset_bits = 4, - .size_bits = 4 }, - { MCMEMBER_REC_FIELD(proxy_join), - .offset_words = 12, - .offset_bits = 8, - .size_bits = 1 }, - { RESERVED, - .offset_words = 12, - .offset_bits = 9, - .size_bits = 23 }, -}; - -#define SERVICE_REC_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ - .field_name = "sa_service_rec:" #field - -static const struct ib_field service_rec_table[] = { - { SERVICE_REC_FIELD(id), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 64 }, - { SERVICE_REC_FIELD(gid), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 128 }, - { SERVICE_REC_FIELD(pkey), - .offset_words = 6, - .offset_bits = 0, - .size_bits = 16 }, - { SERVICE_REC_FIELD(lease), - .offset_words = 7, - .offset_bits = 0, - .size_bits = 32 }, - { SERVICE_REC_FIELD(key), - .offset_words = 8, - .offset_bits = 0, - .size_bits = 128 }, - { SERVICE_REC_FIELD(name), - .offset_words = 12, - .offset_bits = 0, - .size_bits = 64*8 }, - { SERVICE_REC_FIELD(data8), - .offset_words = 28, - .offset_bits = 0, - .size_bits = 16*8 }, - { SERVICE_REC_FIELD(data16), - .offset_words = 32, - .offset_bits = 0, - .size_bits = 8*16 }, - { SERVICE_REC_FIELD(data32), - .offset_words = 36, - .offset_bits = 0, - .size_bits = 4*32 }, - { SERVICE_REC_FIELD(data64), - .offset_words = 40, - .offset_bits = 0, - .size_bits = 2*64 }, -}; - -#define CLASSPORTINFO_REC_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ - .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ - .field_name = "ib_class_port_info:" #field - -static const struct ib_field classport_info_rec_table[] = { - { CLASSPORTINFO_REC_FIELD(base_version), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 8 }, - { CLASSPORTINFO_REC_FIELD(class_version), - .offset_words = 0, - .offset_bits = 8, - .size_bits = 8 }, - { CLASSPORTINFO_REC_FIELD(capability_mask), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 16 }, - { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), - .offset_words = 1, - .offset_bits = 0, - .size_bits = 32 }, - { CLASSPORTINFO_REC_FIELD(redirect_gid), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 128 }, - { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), - .offset_words = 6, - .offset_bits = 0, - .size_bits = 32 }, - { CLASSPORTINFO_REC_FIELD(redirect_lid), - .offset_words = 7, - .offset_bits = 0, - .size_bits = 16 }, - { CLASSPORTINFO_REC_FIELD(redirect_pkey), - .offset_words = 7, - .offset_bits = 16, - .size_bits = 16 }, - - { CLASSPORTINFO_REC_FIELD(redirect_qp), - .offset_words = 8, - .offset_bits = 0, - .size_bits = 32 }, - { CLASSPORTINFO_REC_FIELD(redirect_qkey), - .offset_words = 9, - .offset_bits = 0, - .size_bits = 32 }, - - { CLASSPORTINFO_REC_FIELD(trap_gid), - .offset_words = 10, - .offset_bits = 0, - .size_bits = 128 }, - { CLASSPORTINFO_REC_FIELD(trap_tcslfl), - .offset_words = 14, - .offset_bits = 0, - .size_bits = 32 }, - - { CLASSPORTINFO_REC_FIELD(trap_lid), - .offset_words = 15, - .offset_bits = 0, - .size_bits = 16 }, - { CLASSPORTINFO_REC_FIELD(trap_pkey), - .offset_words = 15, - .offset_bits = 16, - .size_bits = 16 }, - - { CLASSPORTINFO_REC_FIELD(trap_hlqp), - .offset_words = 16, - .offset_bits = 0, - .size_bits = 32 }, - { CLASSPORTINFO_REC_FIELD(trap_qkey), - .offset_words = 17, - .offset_bits = 0, - .size_bits = 32 }, -}; - -#define GUIDINFO_REC_FIELD(field) \ - .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ - .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ - .field_name = "sa_guidinfo_rec:" #field - -static const struct ib_field guidinfo_rec_table[] = { - { GUIDINFO_REC_FIELD(lid), - .offset_words = 0, - .offset_bits = 0, - .size_bits = 16 }, - { GUIDINFO_REC_FIELD(block_num), - .offset_words = 0, - .offset_bits = 16, - .size_bits = 8 }, - { GUIDINFO_REC_FIELD(res1), - .offset_words = 0, - .offset_bits = 24, - .size_bits = 8 }, - { GUIDINFO_REC_FIELD(res2), - .offset_words = 1, - .offset_bits = 0, - .size_bits = 32 }, - { GUIDINFO_REC_FIELD(guid_info_list), - .offset_words = 2, - .offset_bits = 0, - .size_bits = 512 }, -}; - -static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) -{ - query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE; -} - -static void free_sm_ah(struct kref *kref) -{ - struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); - - ib_destroy_ah(sm_ah->ah); - kfree(sm_ah); -} - -static void update_sm_ah(struct work_struct *work) -{ - struct ib_sa_port *port = - container_of(work, struct ib_sa_port, update_task); - struct ib_sa_sm_ah *new_ah; - struct ib_port_attr port_attr; - struct ib_ah_attr ah_attr; - - if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { - pr_warn("Couldn't query port\n"); - return; - } - - new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); - if (!new_ah) { - return; - } - - kref_init(&new_ah->ref); - new_ah->src_path_mask = (1 << port_attr.lmc) - 1; - - new_ah->pkey_index = 0; - if (ib_find_pkey(port->agent->device, port->port_num, - IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) - pr_err("Couldn't find index for default PKey\n"); - - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = port_attr.sm_lid; - ah_attr.sl = port_attr.sm_sl; - ah_attr.port_num = port->port_num; - if (port_attr.grh_required) { - ah_attr.ah_flags = IB_AH_GRH; - ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix); - ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); - } - - new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); - if (IS_ERR(new_ah->ah)) { - pr_warn("Couldn't create new SM AH\n"); - kfree(new_ah); - return; - } - - spin_lock_irq(&port->ah_lock); - if (port->sm_ah) - kref_put(&port->sm_ah->ref, free_sm_ah); - port->sm_ah = new_ah; - spin_unlock_irq(&port->ah_lock); - -} - -static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) -{ - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_SM_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER) { - unsigned long flags; - struct ib_sa_device *sa_dev = - container_of(handler, typeof(*sa_dev), event_handler); - struct ib_sa_port *port = - &sa_dev->port[event->element.port_num - sa_dev->start_port]; - - if (!rdma_cap_ib_sa(handler->device, port->port_num)) - return; - - spin_lock_irqsave(&port->ah_lock, flags); - if (port->sm_ah) - kref_put(&port->sm_ah->ref, free_sm_ah); - port->sm_ah = NULL; - spin_unlock_irqrestore(&port->ah_lock, flags); - - if (event->event == IB_EVENT_SM_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_LID_CHANGE) { - spin_lock_irqsave(&port->classport_lock, flags); - port->classport_info.valid = false; - spin_unlock_irqrestore(&port->classport_lock, flags); - } - queue_work(ib_wq, &sa_dev->port[event->element.port_num - - sa_dev->start_port].update_task); - } -} - -void ib_sa_register_client(struct ib_sa_client *client) -{ - atomic_set(&client->users, 1); - init_completion(&client->comp); -} -EXPORT_SYMBOL(ib_sa_register_client); - -void ib_sa_unregister_client(struct ib_sa_client *client) -{ - ib_sa_client_put(client); - wait_for_completion(&client->comp); -} -EXPORT_SYMBOL(ib_sa_unregister_client); - -/** - * ib_sa_cancel_query - try to cancel an SA query - * @id:ID of query to cancel - * @query:query pointer to cancel - * - * Try to cancel an SA query. If the id and query don't match up or - * the query has already completed, nothing is done. Otherwise the - * query is canceled and will complete with a status of -EINTR. - */ -void ib_sa_cancel_query(int id, struct ib_sa_query *query) -{ - unsigned long flags; - struct ib_mad_agent *agent; - struct ib_mad_send_buf *mad_buf; - - spin_lock_irqsave(&idr_lock, flags); - if (idr_find(&query_idr, id) != query) { - spin_unlock_irqrestore(&idr_lock, flags); - return; - } - agent = query->port->agent; - mad_buf = query->mad_buf; - spin_unlock_irqrestore(&idr_lock, flags); -} -EXPORT_SYMBOL(ib_sa_cancel_query); - -static u8 get_src_path_mask(struct ib_device *device, u8 port_num) -{ - struct ib_sa_device *sa_dev; - struct ib_sa_port *port; - unsigned long flags; - u8 src_path_mask; - - sa_dev = ib_get_client_data(device, &sa_client); - if (!sa_dev) - return 0x7f; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - spin_lock_irqsave(&port->ah_lock, flags); - src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f; - spin_unlock_irqrestore(&port->ah_lock, flags); - - return src_path_mask; -} - -int ib_init_ah_from_path(struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) -{ - int ret; - u16 gid_index; - int use_roce; - struct net_device *ndev = NULL; - - memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(rec->dlid); - ah_attr->sl = rec->sl; - ah_attr->src_path_bits = be16_to_cpu(rec->slid) & - get_src_path_mask(device, port_num); - ah_attr->port_num = port_num; - ah_attr->static_rate = rec->rate; - - use_roce = rdma_cap_eth_ah(device, port_num); - - if (use_roce) { - struct net_device *idev; - struct net_device *resolved_dev; - struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex, - .net = rec->net ? rec->net : - &init_net}; - union { - struct sockaddr _sockaddr; - struct sockaddr_in _sockaddr_in; - struct sockaddr_in6 _sockaddr_in6; - } sgid_addr, dgid_addr; - - if (!device->get_netdev) - return -EOPNOTSUPP; - - rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); - rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); - - /* validate the route */ - ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, - &dgid_addr._sockaddr, &dev_addr); - if (ret) - return ret; - - if ((dev_addr.network == RDMA_NETWORK_IPV4 || - dev_addr.network == RDMA_NETWORK_IPV6) && - rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) - return -EINVAL; - - idev = device->get_netdev(device, port_num); - if (!idev) - return -ENODEV; - - resolved_dev = dev_get_by_index(dev_addr.net, - dev_addr.bound_dev_if); - if (resolved_dev->if_flags & IFF_LOOPBACK) { - dev_put(resolved_dev); - resolved_dev = idev; - dev_hold(resolved_dev); - } - ndev = ib_get_ndev_from_path(rec); - rcu_read_lock(); - if ((ndev && ndev != resolved_dev) || - (resolved_dev != idev && - !rdma_is_upper_dev_rcu(idev, resolved_dev))) - ret = -EHOSTUNREACH; - rcu_read_unlock(); - dev_put(idev); - dev_put(resolved_dev); - if (ret) { - if (ndev) - dev_put(ndev); - return ret; - } - } - - if (rec->hop_limit > 0 || use_roce) { - ah_attr->ah_flags = IB_AH_GRH; - ah_attr->grh.dgid = rec->dgid; - - ret = ib_find_cached_gid_by_port(device, &rec->sgid, - rec->gid_type, port_num, ndev, - &gid_index); - if (ret) { - if (ndev) - dev_put(ndev); - return ret; - } - - ah_attr->grh.sgid_index = gid_index; - ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); - ah_attr->grh.hop_limit = rec->hop_limit; - ah_attr->grh.traffic_class = rec->traffic_class; - if (ndev) - dev_put(ndev); - } - - if (use_roce) - memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); - - return 0; -} -EXPORT_SYMBOL(ib_init_ah_from_path); - -static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) -{ - unsigned long flags; - - spin_lock_irqsave(&query->port->ah_lock, flags); - if (!query->port->sm_ah) { - spin_unlock_irqrestore(&query->port->ah_lock, flags); - return -EAGAIN; - } - kref_get(&query->port->sm_ah->ref); - query->sm_ah = query->port->sm_ah; - spin_unlock_irqrestore(&query->port->ah_lock, flags); - - query->mad_buf = ib_create_send_mad(query->port->agent, 1, - query->sm_ah->pkey_index, - 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, - gfp_mask, - IB_MGMT_BASE_VERSION); - if (IS_ERR(query->mad_buf)) { - kref_put(&query->sm_ah->ref, free_sm_ah); - return -ENOMEM; - } - - query->mad_buf->ah = query->sm_ah->ah; - - return 0; -} - -static void free_mad(struct ib_sa_query *query) -{ - ib_free_send_mad(query->mad_buf); - kref_put(&query->sm_ah->ref, free_sm_ah); -} - -static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) -{ - unsigned long flags; - - memset(mad, 0, sizeof *mad); - - mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; - mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; - - spin_lock_irqsave(&tid_lock, flags); - mad->mad_hdr.tid = - cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); - spin_unlock_irqrestore(&tid_lock, flags); -} - -static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) -{ - bool preload = gfpflags_allow_blocking(gfp_mask); - unsigned long flags; - int ret, id; - - if (preload) - idr_preload(gfp_mask); - spin_lock_irqsave(&idr_lock, flags); - - id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&idr_lock, flags); - if (preload) - idr_preload_end(); - if (id < 0) - return id; - - query->mad_buf->timeout_ms = timeout_ms; - query->mad_buf->context[0] = query; - query->id = id; - - if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { - ib_sa_disable_local_svc(query); - } - - ret = ib_post_send_mad(query->mad_buf, NULL); - if (ret) { - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, id); - spin_unlock_irqrestore(&idr_lock, flags); - } - - /* - * It's not safe to dereference query any more, because the - * send may already have completed and freed the query in - * another context. - */ - return ret ? ret : id; -} - -void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) -{ - ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); -} -EXPORT_SYMBOL(ib_sa_unpack_path); - -void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) -{ - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); -} -EXPORT_SYMBOL(ib_sa_pack_path); - -static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, - int status, - struct ib_sa_mad *mad) -{ - struct ib_sa_path_query *query = - container_of(sa_query, struct ib_sa_path_query, sa_query); - - if (mad) { - struct ib_sa_path_rec rec; - - ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), - mad->data, &rec); - rec.net = NULL; - rec.ifindex = 0; - rec.gid_type = IB_GID_TYPE_IB; - eth_zero_addr(rec.dmac); - query->callback(status, &rec, query->context); - } else - query->callback(status, NULL, query->context); -} - -static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) -{ - kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); -} - -/** - * ib_sa_path_rec_get - Start a Path get query - * @client:SA client - * @device:device to send query on - * @port_num: port number to send query on - * @rec:Path Record to send in query - * @comp_mask:component mask to send in query - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when query completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:query context, used to cancel query - * - * Send a Path Record Get query to the SA to look up a path. The - * callback function will be called when the query completes (or - * fails); status is 0 for a successful response, -EINTR if the query - * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error - * occurred sending the query. The resp parameter of the callback is - * only valid if status is 0. - * - * If the return value of ib_sa_path_rec_get() is negative, it is an - * error code. Otherwise it is a query ID that can be used to cancel - * the query. - */ -int ib_sa_path_rec_get(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_path_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct ib_sa_path_query *query; - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; - int ret; - - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - - query = kzalloc(sizeof(*query), gfp_mask); - if (!query) - return -ENOMEM; - - query->sa_query.port = port; - ret = alloc_mad(&query->sa_query, gfp_mask); - if (ret) - goto err1; - - ib_sa_client_get(client); - query->sa_query.client = client; - query->callback = callback; - query->context = context; - - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); - - query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; - query->sa_query.release = ib_sa_path_rec_release; - mad->mad_hdr.method = IB_MGMT_METHOD_GET; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); - mad->sa_hdr.comp_mask = comp_mask; - - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); - - *sa_query = &query->sa_query; - - query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; - query->sa_query.mad_buf->context[1] = rec; - - ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_sa_client_put(query->sa_query.client); - free_mad(&query->sa_query); - -err1: - kfree(query); - return ret; -} -EXPORT_SYMBOL(ib_sa_path_rec_get); - -static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, - int status, - struct ib_sa_mad *mad) -{ - struct ib_sa_service_query *query = - container_of(sa_query, struct ib_sa_service_query, sa_query); - - if (mad) { - struct ib_sa_service_rec rec; - - ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), - mad->data, &rec); - query->callback(status, &rec, query->context); - } else - query->callback(status, NULL, query->context); -} - -static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) -{ - kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); -} - -/** - * ib_sa_service_rec_query - Start Service Record operation - * @client:SA client - * @device:device to send request on - * @port_num: port number to send request on - * @method:SA method - should be get, set, or delete - * @rec:Service Record to send in request - * @comp_mask:component mask to send in request - * @timeout_ms:time to wait for response - * @gfp_mask:GFP mask to use for internal allocations - * @callback:function called when request completes, times out or is - * canceled - * @context:opaque user context passed to callback - * @sa_query:request context, used to cancel request - * - * Send a Service Record set/get/delete to the SA to register, - * unregister or query a service record. - * The callback function will be called when the request completes (or - * fails); status is 0 for a successful response, -EINTR if the query - * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error - * occurred sending the query. The resp parameter of the callback is - * only valid if status is 0. - * - * If the return value of ib_sa_service_rec_query() is negative, it is an - * error code. Otherwise it is a request ID that can be used to cancel - * the query. - */ -int ib_sa_service_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, u8 method, - struct ib_sa_service_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_service_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct ib_sa_service_query *query; - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; - int ret; - - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - - if (method != IB_MGMT_METHOD_GET && - method != IB_MGMT_METHOD_SET && - method != IB_SA_METHOD_DELETE) - return -EINVAL; - - query = kzalloc(sizeof(*query), gfp_mask); - if (!query) - return -ENOMEM; - - query->sa_query.port = port; - ret = alloc_mad(&query->sa_query, gfp_mask); - if (ret) - goto err1; - - ib_sa_client_get(client); - query->sa_query.client = client; - query->callback = callback; - query->context = context; - - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); - - query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; - query->sa_query.release = ib_sa_service_rec_release; - mad->mad_hdr.method = method; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); - mad->sa_hdr.comp_mask = comp_mask; - - ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), - rec, mad->data); - - *sa_query = &query->sa_query; - - ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_sa_client_put(query->sa_query.client); - free_mad(&query->sa_query); - -err1: - kfree(query); - return ret; -} -EXPORT_SYMBOL(ib_sa_service_rec_query); - -static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, - int status, - struct ib_sa_mad *mad) -{ - struct ib_sa_mcmember_query *query = - container_of(sa_query, struct ib_sa_mcmember_query, sa_query); - - if (mad) { - struct ib_sa_mcmember_rec rec; - - ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), - mad->data, &rec); - query->callback(status, &rec, query->context); - } else - query->callback(status, NULL, query->context); -} - -static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) -{ - kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); -} - -int ib_sa_mcmember_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - u8 method, - struct ib_sa_mcmember_rec *rec, - ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_mcmember_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct ib_sa_mcmember_query *query; - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; - int ret; - - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - - query = kzalloc(sizeof(*query), gfp_mask); - if (!query) - return -ENOMEM; - - query->sa_query.port = port; - ret = alloc_mad(&query->sa_query, gfp_mask); - if (ret) - goto err1; - - ib_sa_client_get(client); - query->sa_query.client = client; - query->callback = callback; - query->context = context; - - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); - - query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; - query->sa_query.release = ib_sa_mcmember_rec_release; - mad->mad_hdr.method = method; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - mad->sa_hdr.comp_mask = comp_mask; - - ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), - rec, mad->data); - - *sa_query = &query->sa_query; - - ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_sa_client_put(query->sa_query.client); - free_mad(&query->sa_query); - -err1: - kfree(query); - return ret; -} - -/* Support GuidInfoRecord */ -static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, - int status, - struct ib_sa_mad *mad) -{ - struct ib_sa_guidinfo_query *query = - container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); - - if (mad) { - struct ib_sa_guidinfo_rec rec; - - ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), - mad->data, &rec); - query->callback(status, &rec, query->context); - } else - query->callback(status, NULL, query->context); -} - -static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) -{ - kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); -} - -int ib_sa_guid_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_guidinfo_rec *rec, - ib_sa_comp_mask comp_mask, u8 method, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_guidinfo_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct ib_sa_guidinfo_query *query; - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; - int ret; - - if (!sa_dev) - return -ENODEV; - - if (method != IB_MGMT_METHOD_GET && - method != IB_MGMT_METHOD_SET && - method != IB_SA_METHOD_DELETE) { - return -EINVAL; - } - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - - query = kzalloc(sizeof(*query), gfp_mask); - if (!query) - return -ENOMEM; - - query->sa_query.port = port; - ret = alloc_mad(&query->sa_query, gfp_mask); - if (ret) - goto err1; - - ib_sa_client_get(client); - query->sa_query.client = client; - query->callback = callback; - query->context = context; - - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); - - query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; - query->sa_query.release = ib_sa_guidinfo_rec_release; - - mad->mad_hdr.method = method; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); - mad->sa_hdr.comp_mask = comp_mask; - - ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, - mad->data); - - *sa_query = &query->sa_query; - - ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_sa_client_put(query->sa_query.client); - free_mad(&query->sa_query); - -err1: - kfree(query); - return ret; -} -EXPORT_SYMBOL(ib_sa_guid_info_rec_query); - -/* Support get SA ClassPortInfo */ -static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, - int status, - struct ib_sa_mad *mad) -{ - unsigned long flags; - struct ib_sa_classport_info_query *query = - container_of(sa_query, struct ib_sa_classport_info_query, sa_query); - - if (mad) { - struct ib_class_port_info rec; - - ib_unpack(classport_info_rec_table, - ARRAY_SIZE(classport_info_rec_table), - mad->data, &rec); - - spin_lock_irqsave(&sa_query->port->classport_lock, flags); - if (!status && !sa_query->port->classport_info.valid) { - memcpy(&sa_query->port->classport_info.data, &rec, - sizeof(sa_query->port->classport_info.data)); - - sa_query->port->classport_info.valid = true; - } - spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); - - query->callback(status, &rec, query->context); - } else { - query->callback(status, NULL, query->context); - } -} - -static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query) -{ - kfree(container_of(sa_query, struct ib_sa_classport_info_query, - sa_query)); -} - -int ib_sa_classport_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_class_port_info *resp, - void *context), - void *context, - struct ib_sa_query **sa_query) -{ - struct ib_sa_classport_info_query *query; - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - struct ib_mad_agent *agent; - struct ib_sa_mad *mad; - struct ib_class_port_info cached_class_port_info; - int ret; - unsigned long flags; - - if (!sa_dev) - return -ENODEV; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - agent = port->agent; - - /* Use cached ClassPortInfo attribute if valid instead of sending mad */ - spin_lock_irqsave(&port->classport_lock, flags); - if (port->classport_info.valid && callback) { - memcpy(&cached_class_port_info, &port->classport_info.data, - sizeof(cached_class_port_info)); - spin_unlock_irqrestore(&port->classport_lock, flags); - callback(0, &cached_class_port_info, context); - return 0; - } - spin_unlock_irqrestore(&port->classport_lock, flags); - - query = kzalloc(sizeof(*query), gfp_mask); - if (!query) - return -ENOMEM; - - query->sa_query.port = port; - ret = alloc_mad(&query->sa_query, gfp_mask); - if (ret) - goto err1; - - ib_sa_client_get(client); - query->sa_query.client = client; - query->callback = callback; - query->context = context; - - mad = query->sa_query.mad_buf->mad; - init_mad(mad, agent); - - query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL; - - query->sa_query.release = ib_sa_portclass_info_rec_release; - /* support GET only */ - mad->mad_hdr.method = IB_MGMT_METHOD_GET; - mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); - mad->sa_hdr.comp_mask = 0; - *sa_query = &query->sa_query; - - ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); - if (ret < 0) - goto err2; - - return ret; - -err2: - *sa_query = NULL; - ib_sa_client_put(query->sa_query.client); - free_mad(&query->sa_query); - -err1: - kfree(query); - return ret; -} -EXPORT_SYMBOL(ib_sa_classport_info_rec_query); - -static void send_handler(struct ib_mad_agent *agent, - struct ib_mad_send_wc *mad_send_wc) -{ - struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; - unsigned long flags; - - if (query->callback) - switch (mad_send_wc->status) { - case IB_WC_SUCCESS: - /* No callback -- already got recv */ - break; - case IB_WC_RESP_TIMEOUT_ERR: - query->callback(query, -ETIMEDOUT, NULL); - break; - case IB_WC_WR_FLUSH_ERR: - query->callback(query, -EINTR, NULL); - break; - default: - query->callback(query, -EIO, NULL); - break; - } - - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); - spin_unlock_irqrestore(&idr_lock, flags); - - free_mad(query); - ib_sa_client_put(query->client); - query->release(query); -} - -static void recv_handler(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_sa_query *query; - - if (!send_buf) - return; - - query = send_buf->context[0]; - if (query->callback) { - if (mad_recv_wc->wc->status == IB_WC_SUCCESS) - query->callback(query, - mad_recv_wc->recv_buf.mad->mad_hdr.status ? - -EINVAL : 0, - (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); - else - query->callback(query, -EIO, NULL); - } - - ib_free_recv_mad(mad_recv_wc); -} - -static void ib_sa_add_one(struct ib_device *device) -{ - struct ib_sa_device *sa_dev; - int s, e, i; - int count = 0; - - s = rdma_start_port(device); - e = rdma_end_port(device); - - sa_dev = kzalloc(sizeof *sa_dev + - (e - s + 1) * sizeof (struct ib_sa_port), - GFP_KERNEL); - if (!sa_dev) - return; - - sa_dev->start_port = s; - sa_dev->end_port = e; - - for (i = 0; i <= e - s; ++i) { - spin_lock_init(&sa_dev->port[i].ah_lock); - if (!rdma_cap_ib_sa(device, i + 1)) - continue; - - sa_dev->port[i].sm_ah = NULL; - sa_dev->port[i].port_num = i + s; - - spin_lock_init(&sa_dev->port[i].classport_lock); - sa_dev->port[i].classport_info.valid = false; - - sa_dev->port[i].agent = - ib_register_mad_agent(device, i + s, IB_QPT_GSI, - NULL, 0, send_handler, - recv_handler, sa_dev, 0); - if (IS_ERR(sa_dev->port[i].agent)) - goto err; - - INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); - - count++; - } - - if (!count) - goto free; - - ib_set_client_data(device, &sa_client, sa_dev); - - /* - * We register our event handler after everything is set up, - * and then update our cached info after the event handler is - * registered to avoid any problems if a port changes state - * during our initialization. - */ - - INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); - if (ib_register_event_handler(&sa_dev->event_handler)) - goto err; - - for (i = 0; i <= e - s; ++i) { - if (rdma_cap_ib_sa(device, i + 1)) - update_sm_ah(&sa_dev->port[i].update_task); - } - - return; - -err: - while (--i >= 0) { - if (rdma_cap_ib_sa(device, i + 1)) - ib_unregister_mad_agent(sa_dev->port[i].agent); - } -free: - kfree(sa_dev); - return; -} - -static void ib_sa_remove_one(struct ib_device *device, void *client_data) -{ - struct ib_sa_device *sa_dev = client_data; - int i; - - if (!sa_dev) - return; - - ib_unregister_event_handler(&sa_dev->event_handler); - - flush_workqueue(ib_wq); - - for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { - if (rdma_cap_ib_sa(device, i + 1)) { - ib_unregister_mad_agent(sa_dev->port[i].agent); - if (sa_dev->port[i].sm_ah) - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); - } - - } - - kfree(sa_dev); -} - -int ib_sa_init(void) -{ - int ret; - - get_random_bytes(&tid, sizeof tid); - - ret = ib_register_client(&sa_client); - if (ret) { - pr_err("Couldn't register ib_sa client\n"); - goto err1; - } - - ret = mcast_init(); - if (ret) { - pr_err("Couldn't initialize multicast handling\n"); - goto err2; - } - - return 0; - -err2: - ib_unregister_client(&sa_client); -err1: - return ret; -} - -void ib_sa_cleanup(void) -{ - mcast_cleanup(); - ib_unregister_client(&sa_client); - idr_destroy(&query_idr); -} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/sa_query.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad.c (nonexistent) @@ -1,3339 +0,0 @@ -/* - * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2009 HNR Consulting. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#define LINUXKPI_PARAM_PREFIX ibcore_ -#define KBUILD_MODNAME "ibcore" - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include - -#include "mad_priv.h" -#include "mad_rmpp.h" -#include "smi.h" -#include "opa_smi.h" -#include "agent.h" -#include "core_priv.h" - -static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; -static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; - -module_param_named(send_queue_size, mad_sendq_size, int, 0444); -MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); -module_param_named(recv_queue_size, mad_recvq_size, int, 0444); -MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); - -static struct list_head ib_mad_port_list; -static u32 ib_mad_client_id = 0; - -/* Port list lock */ -static DEFINE_SPINLOCK(ib_mad_port_list_lock); - -/* Forward declarations */ -static int method_in_use(struct ib_mad_mgmt_method_table **method, - struct ib_mad_reg_req *mad_reg_req); -static void remove_mad_reg_req(struct ib_mad_agent_private *priv); -static struct ib_mad_agent_private *find_mad_agent( - struct ib_mad_port_private *port_priv, - const struct ib_mad_hdr *mad); -static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, - struct ib_mad_private *mad); -static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); -static void timeout_sends(struct work_struct *work); -static void local_completions(struct work_struct *work); -static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, - struct ib_mad_agent_private *agent_priv, - u8 mgmt_class); -static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, - struct ib_mad_agent_private *agent_priv); -static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, - struct ib_wc *wc); -static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); - -/* - * Returns a ib_mad_port_private structure or NULL for a device/port - * Assumes ib_mad_port_list_lock is being held - */ -static inline struct ib_mad_port_private * -__ib_get_mad_port(struct ib_device *device, int port_num) -{ - struct ib_mad_port_private *entry; - - list_for_each_entry(entry, &ib_mad_port_list, port_list) { - if (entry->device == device && entry->port_num == port_num) - return entry; - } - return NULL; -} - -/* - * Wrapper function to return a ib_mad_port_private structure or NULL - * for a device/port - */ -static inline struct ib_mad_port_private * -ib_get_mad_port(struct ib_device *device, int port_num) -{ - struct ib_mad_port_private *entry; - unsigned long flags; - - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - entry = __ib_get_mad_port(device, port_num); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - - return entry; -} - -static inline u8 convert_mgmt_class(u8 mgmt_class) -{ - /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ - return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? - 0 : mgmt_class; -} - -static int get_spl_qp_index(enum ib_qp_type qp_type) -{ - switch (qp_type) - { - case IB_QPT_SMI: - return 0; - case IB_QPT_GSI: - return 1; - default: - return -1; - } -} - -static int vendor_class_index(u8 mgmt_class) -{ - return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; -} - -static int is_vendor_class(u8 mgmt_class) -{ - if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || - (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return 0; - return 1; -} - -static int is_vendor_oui(char *oui) -{ - if (oui[0] || oui[1] || oui[2]) - return 1; - return 0; -} - -static int is_vendor_method_in_use( - struct ib_mad_mgmt_vendor_class *vendor_class, - struct ib_mad_reg_req *mad_reg_req) -{ - struct ib_mad_mgmt_method_table *method; - int i; - - for (i = 0; i < MAX_MGMT_OUI; i++) { - if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { - method = vendor_class->method_table[i]; - if (method) { - if (method_in_use(&method, mad_reg_req)) - return 1; - else - break; - } - } - } - return 0; -} - -int ib_response_mad(const struct ib_mad_hdr *hdr) -{ - return ((hdr->method & IB_MGMT_METHOD_RESP) || - (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || - ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && - (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); -} -EXPORT_SYMBOL(ib_response_mad); - -/* - * ib_register_mad_agent - Register to send/receive MADs - */ -struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - struct ib_mad_reg_req *mad_reg_req, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context, - u32 registration_flags) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_agent *ret = ERR_PTR(-EINVAL); - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_reg_req *reg_req = NULL; - struct ib_mad_mgmt_class_table *class; - struct ib_mad_mgmt_vendor_class_table *vendor; - struct ib_mad_mgmt_vendor_class *vendor_class; - struct ib_mad_mgmt_method_table *method; - int ret2, qpn; - unsigned long flags; - u8 mgmt_class, vclass; - - /* Validate parameters */ - qpn = get_spl_qp_index(qp_type); - if (qpn == -1) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid QP Type %d\n", - qp_type); - goto error1; - } - - if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid RMPP Version %u\n", - rmpp_version); - goto error1; - } - - /* Validate MAD registration request if supplied */ - if (mad_reg_req) { - if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid Class Version %u\n", - mad_reg_req->mgmt_class_version); - goto error1; - } - if (!recv_handler) { - dev_notice(&device->dev, - "ib_register_mad_agent: no recv_handler\n"); - goto error1; - } - if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { - /* - * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only - * one in this range currently allowed - */ - if (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", - mad_reg_req->mgmt_class); - goto error1; - } - } else if (mad_reg_req->mgmt_class == 0) { - /* - * Class 0 is reserved in IBA and is used for - * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE - */ - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0\n"); - goto error1; - } else if (is_vendor_class(mad_reg_req->mgmt_class)) { - /* - * If class is in "new" vendor range, - * ensure supplied OUI is not zero - */ - if (!is_vendor_oui(mad_reg_req->oui)) { - dev_notice(&device->dev, - "ib_register_mad_agent: No OUI specified for class 0x%x\n", - mad_reg_req->mgmt_class); - goto error1; - } - } - /* Make sure class supplied is consistent with RMPP */ - if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { - if (rmpp_version) { - dev_notice(&device->dev, - "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", - mad_reg_req->mgmt_class); - goto error1; - } - } - - /* Make sure class supplied is consistent with QP type */ - if (qp_type == IB_QPT_SMI) { - if ((mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_LID_ROUTED) && - (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", - mad_reg_req->mgmt_class); - goto error1; - } - } else { - if ((mad_reg_req->mgmt_class == - IB_MGMT_CLASS_SUBN_LID_ROUTED) || - (mad_reg_req->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", - mad_reg_req->mgmt_class); - goto error1; - } - } - } else { - /* No registration request supplied */ - if (!send_handler) - goto error1; - if (registration_flags & IB_MAD_USER_RMPP) - goto error1; - } - - /* Validate device and port */ - port_priv = ib_get_mad_port(device, port_num); - if (!port_priv) { - dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n"); - ret = ERR_PTR(-ENODEV); - goto error1; - } - - /* Verify the QP requested is supported. For example, Ethernet devices - * will not have QP0 */ - if (!port_priv->qp_info[qpn].qp) { - dev_notice(&device->dev, - "ib_register_mad_agent: QP %d not supported\n", qpn); - ret = ERR_PTR(-EPROTONOSUPPORT); - goto error1; - } - - /* Allocate structures */ - mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); - if (!mad_agent_priv) { - ret = ERR_PTR(-ENOMEM); - goto error1; - } - - if (mad_reg_req) { - reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); - if (!reg_req) { - ret = ERR_PTR(-ENOMEM); - goto error3; - } - } - - /* Now, fill in the various structures */ - mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; - mad_agent_priv->reg_req = reg_req; - mad_agent_priv->agent.rmpp_version = rmpp_version; - mad_agent_priv->agent.device = device; - mad_agent_priv->agent.recv_handler = recv_handler; - mad_agent_priv->agent.send_handler = send_handler; - mad_agent_priv->agent.context = context; - mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; - mad_agent_priv->agent.port_num = port_num; - mad_agent_priv->agent.flags = registration_flags; - spin_lock_init(&mad_agent_priv->lock); - INIT_LIST_HEAD(&mad_agent_priv->send_list); - INIT_LIST_HEAD(&mad_agent_priv->wait_list); - INIT_LIST_HEAD(&mad_agent_priv->done_list); - INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); - INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); - INIT_LIST_HEAD(&mad_agent_priv->local_list); - INIT_WORK(&mad_agent_priv->local_work, local_completions); - atomic_set(&mad_agent_priv->refcount, 1); - init_completion(&mad_agent_priv->comp); - - spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; - - /* - * Make sure MAD registration (if supplied) - * is non overlapping with any existing ones - */ - if (mad_reg_req) { - mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); - if (!is_vendor_class(mgmt_class)) { - class = port_priv->version[mad_reg_req-> - mgmt_class_version].class; - if (class) { - method = class->method_table[mgmt_class]; - if (method) { - if (method_in_use(&method, - mad_reg_req)) - goto error4; - } - } - ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, - mgmt_class); - } else { - /* "New" vendor class range */ - vendor = port_priv->version[mad_reg_req-> - mgmt_class_version].vendor; - if (vendor) { - vclass = vendor_class_index(mgmt_class); - vendor_class = vendor->vendor_class[vclass]; - if (vendor_class) { - if (is_vendor_method_in_use( - vendor_class, - mad_reg_req)) - goto error4; - } - } - ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); - } - if (ret2) { - ret = ERR_PTR(ret2); - goto error4; - } - } - - /* Add mad agent into port's agent list */ - list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); - - return &mad_agent_priv->agent; - -error4: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); - kfree(reg_req); -error3: - kfree(mad_agent_priv); -error1: - return ret; -} -EXPORT_SYMBOL(ib_register_mad_agent); - -static inline int is_snooping_sends(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (/*IB_MAD_SNOOP_POSTED_SENDS | - IB_MAD_SNOOP_RMPP_SENDS |*/ - IB_MAD_SNOOP_SEND_COMPLETIONS /*| - IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); -} - -static inline int is_snooping_recvs(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (IB_MAD_SNOOP_RECVS /*| - IB_MAD_SNOOP_RMPP_RECVS*/)); -} - -static int register_snoop_agent(struct ib_mad_qp_info *qp_info, - struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_snoop_private **new_snoop_table; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - /* Check for empty slot in array. */ - for (i = 0; i < qp_info->snoop_table_size; i++) - if (!qp_info->snoop_table[i]) - break; - - if (i == qp_info->snoop_table_size) { - /* Grow table. */ - new_snoop_table = krealloc(qp_info->snoop_table, - sizeof mad_snoop_priv * - (qp_info->snoop_table_size + 1), - GFP_ATOMIC); - if (!new_snoop_table) { - i = -ENOMEM; - goto out; - } - - qp_info->snoop_table = new_snoop_table; - qp_info->snoop_table_size++; - } - qp_info->snoop_table[i] = mad_snoop_priv; - atomic_inc(&qp_info->snoop_count); -out: - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - return i; -} - -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_agent *ret; - struct ib_mad_snoop_private *mad_snoop_priv; - int qpn; - - /* Validate parameters */ - if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || - (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - qpn = get_spl_qp_index(qp_type); - if (qpn == -1) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - port_priv = ib_get_mad_port(device, port_num); - if (!port_priv) { - ret = ERR_PTR(-ENODEV); - goto error1; - } - /* Allocate structures */ - mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); - if (!mad_snoop_priv) { - ret = ERR_PTR(-ENOMEM); - goto error1; - } - - /* Now, fill in the various structures */ - mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; - mad_snoop_priv->agent.device = device; - mad_snoop_priv->agent.recv_handler = recv_handler; - mad_snoop_priv->agent.snoop_handler = snoop_handler; - mad_snoop_priv->agent.context = context; - mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; - mad_snoop_priv->agent.port_num = port_num; - mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_completion(&mad_snoop_priv->comp); - mad_snoop_priv->snoop_index = register_snoop_agent( - &port_priv->qp_info[qpn], - mad_snoop_priv); - if (mad_snoop_priv->snoop_index < 0) { - ret = ERR_PTR(mad_snoop_priv->snoop_index); - goto error2; - } - - atomic_set(&mad_snoop_priv->refcount, 1); - return &mad_snoop_priv->agent; - -error2: - kfree(mad_snoop_priv); -error1: - return ret; -} -EXPORT_SYMBOL(ib_register_mad_snoop); - -static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) -{ - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - complete(&mad_agent_priv->comp); -} - -static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) -{ - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - complete(&mad_snoop_priv->comp); -} - -static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) -{ - struct ib_mad_port_private *port_priv; - unsigned long flags; - - /* Note that we could still be handling received MADs */ - - /* - * Canceling all sends results in dropping received response - * MADs, preventing us from queuing additional work - */ - cancel_mads(mad_agent_priv); - port_priv = mad_agent_priv->qp_info->port_priv; - cancel_delayed_work(&mad_agent_priv->timed_work); - - spin_lock_irqsave(&port_priv->reg_lock, flags); - remove_mad_reg_req(mad_agent_priv); - list_del(&mad_agent_priv->agent_list); - spin_unlock_irqrestore(&port_priv->reg_lock, flags); - - flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); - - deref_mad_agent(mad_agent_priv); - wait_for_completion(&mad_agent_priv->comp); - - kfree(mad_agent_priv->reg_req); - kfree(mad_agent_priv); -} - -static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_qp_info *qp_info; - unsigned long flags; - - qp_info = mad_snoop_priv->qp_info; - spin_lock_irqsave(&qp_info->snoop_lock, flags); - qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; - atomic_dec(&qp_info->snoop_count); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - - deref_snoop_agent(mad_snoop_priv); - wait_for_completion(&mad_snoop_priv->comp); - - kfree(mad_snoop_priv); -} - -/* - * ib_unregister_mad_agent - Unregisters a client from using MAD services - */ -int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_snoop_private *mad_snoop_priv; - - /* If the TID is zero, the agent can only snoop. */ - if (mad_agent->hi_tid) { - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); - unregister_mad_agent(mad_agent_priv); - } else { - mad_snoop_priv = container_of(mad_agent, - struct ib_mad_snoop_private, - agent); - unregister_mad_snoop(mad_snoop_priv); - } - return 0; -} -EXPORT_SYMBOL(ib_unregister_mad_agent); - -static void dequeue_mad(struct ib_mad_list_head *mad_list) -{ - struct ib_mad_queue *mad_queue; - unsigned long flags; - - BUG_ON(!mad_list->mad_queue); - mad_queue = mad_list->mad_queue; - spin_lock_irqsave(&mad_queue->lock, flags); - list_del(&mad_list->list); - mad_queue->count--; - spin_unlock_irqrestore(&mad_queue->lock, flags); -} - -static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_buf, mad_send_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void snoop_recv(struct ib_mad_qp_info *qp_info, - struct ib_mad_recv_wc *mad_recv_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL, - mad_recv_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, - u16 pkey_index, u8 port_num, struct ib_wc *wc) -{ - memset(wc, 0, sizeof *wc); - wc->wr_cqe = cqe; - wc->status = IB_WC_SUCCESS; - wc->opcode = IB_WC_RECV; - wc->pkey_index = pkey_index; - wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); - wc->src_qp = IB_QP0; - wc->qp = qp; - wc->slid = slid; - wc->sl = 0; - wc->dlid_path_bits = 0; - wc->port_num = port_num; -} - -static size_t mad_priv_size(const struct ib_mad_private *mp) -{ - return sizeof(struct ib_mad_private) + mp->mad_size; -} - -static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) -{ - size_t size = sizeof(struct ib_mad_private) + mad_size; - struct ib_mad_private *ret = kzalloc(size, flags); - - if (ret) - ret->mad_size = mad_size; - - return ret; -} - -static size_t port_mad_size(const struct ib_mad_port_private *port_priv) -{ - return rdma_max_mad_size(port_priv->device, port_priv->port_num); -} - -static size_t mad_priv_dma_size(const struct ib_mad_private *mp) -{ - return sizeof(struct ib_grh) + mp->mad_size; -} - -/* - * Return 0 if SMP is to be sent - * Return 1 if SMP was consumed locally (whether or not solicited) - * Return < 0 if error - */ -static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *mad_send_wr) -{ - int ret = 0; - struct ib_smp *smp = mad_send_wr->send_buf.mad; - struct opa_smp *opa_smp = (struct opa_smp *)smp; - unsigned long flags; - struct ib_mad_local_private *local; - struct ib_mad_private *mad_priv; - struct ib_mad_port_private *port_priv; - struct ib_mad_agent_private *recv_mad_agent = NULL; - struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num; - struct ib_wc mad_wc; - struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; - size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); - u16 out_mad_pkey_index = 0; - u16 drslid; - bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, - mad_agent_priv->qp_info->port_priv->port_num); - - if (rdma_cap_ib_switch(device) && - smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - port_num = send_wr->port_num; - else - port_num = mad_agent_priv->agent.port_num; - - /* - * Directed route handling starts if the initial LID routed part of - * a request or the ending LID routed part of a response is empty. - * If we are at the start of the LID routed part, don't update the - * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. - */ - if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { - u32 opa_drslid; - - if ((opa_get_smp_direction(opa_smp) - ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == - OPA_LID_PERMISSIVE && - opa_smi_handle_dr_smp_send(opa_smp, - rdma_cap_ib_switch(device), - port_num) == IB_SMI_DISCARD) { - ret = -EINVAL; - dev_err(&device->dev, "OPA Invalid directed route\n"); - goto out; - } - opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); - if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && - opa_drslid & 0xffff0000) { - ret = -EINVAL; - dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", - opa_drslid); - goto out; - } - drslid = (u16)(opa_drslid & 0x0000ffff); - - /* Check to post send on QP or process locally */ - if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && - opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) - goto out; - } else { - if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == - IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == - IB_SMI_DISCARD) { - ret = -EINVAL; - dev_err(&device->dev, "Invalid directed route\n"); - goto out; - } - drslid = be16_to_cpu(smp->dr_slid); - - /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && - smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) - goto out; - } - - local = kmalloc(sizeof *local, GFP_ATOMIC); - if (!local) { - ret = -ENOMEM; - dev_err(&device->dev, "No memory for ib_mad_local_private\n"); - goto out; - } - local->mad_priv = NULL; - local->recv_mad_agent = NULL; - mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); - if (!mad_priv) { - ret = -ENOMEM; - dev_err(&device->dev, "No memory for local response MAD\n"); - kfree(local); - goto out; - } - - build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr.wr_cqe, drslid, - send_wr->pkey_index, - send_wr->port_num, &mad_wc); - - if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { - mad_wc.byte_len = mad_send_wr->send_buf.hdr_len - + mad_send_wr->send_buf.data_len - + sizeof(struct ib_grh); - } - - /* No GRH for DR SMP */ - ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); - switch (ret) - { - case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && - mad_agent_priv->agent.recv_handler) { - local->mad_priv = mad_priv; - local->recv_mad_agent = mad_agent_priv; - /* - * Reference MAD agent until receive - * side of local completion handled - */ - atomic_inc(&mad_agent_priv->refcount); - } else - kfree(mad_priv); - break; - case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: - kfree(mad_priv); - break; - case IB_MAD_RESULT_SUCCESS: - /* Treat like an incoming receive MAD */ - port_priv = ib_get_mad_port(mad_agent_priv->agent.device, - mad_agent_priv->agent.port_num); - if (port_priv) { - memcpy(mad_priv->mad, smp, mad_priv->mad_size); - recv_mad_agent = find_mad_agent(port_priv, - (const struct ib_mad_hdr *)mad_priv->mad); - } - if (!port_priv || !recv_mad_agent) { - /* - * No receiving agent so drop packet and - * generate send completion. - */ - kfree(mad_priv); - break; - } - local->mad_priv = mad_priv; - local->recv_mad_agent = recv_mad_agent; - break; - default: - kfree(mad_priv); - kfree(local); - ret = -EINVAL; - goto out; - } - - local->mad_send_wr = mad_send_wr; - if (opa) { - local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; - local->return_wc_byte_len = mad_size; - } - /* Reference MAD agent until send side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); - /* Queue local completion to local list */ - spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_add_tail(&local->completion_list, &mad_agent_priv->local_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->local_work); - ret = 1; -out: - return ret; -} - -static int get_pad_size(int hdr_len, int data_len, size_t mad_size) -{ - int seg_size, pad; - - seg_size = mad_size - hdr_len; - if (data_len && seg_size) { - pad = seg_size - data_len % seg_size; - return pad == seg_size ? 0 : pad; - } else - return seg_size; -} - -static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_rmpp_segment *s, *t; - - list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { - list_del(&s->list); - kfree(s); - } -} - -static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, - size_t mad_size, gfp_t gfp_mask) -{ - struct ib_mad_send_buf *send_buf = &send_wr->send_buf; - struct ib_rmpp_mad *rmpp_mad = send_buf->mad; - struct ib_rmpp_segment *seg = NULL; - int left, seg_size, pad; - - send_buf->seg_size = mad_size - send_buf->hdr_len; - send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; - seg_size = send_buf->seg_size; - pad = send_wr->pad; - - /* Allocate data segments. */ - for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { - seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); - if (!seg) { - dev_err(&send_buf->mad_agent->device->dev, - "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", - sizeof (*seg) + seg_size, gfp_mask); - free_send_rmpp_list(send_wr); - return -ENOMEM; - } - seg->num = ++send_buf->seg_count; - list_add_tail(&seg->list, &send_wr->rmpp_list); - } - - /* Zero any padding */ - if (pad) - memset(seg->data + seg_size - pad, 0, pad); - - rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> - agent.rmpp_version; - rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; - ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); - - send_wr->cur_seg = container_of(send_wr->rmpp_list.next, - struct ib_rmpp_segment, list); - send_wr->last_ack_seg = send_wr->cur_seg; - return 0; -} - -int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) -{ - return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); -} -EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); - -struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, - u32 remote_qpn, u16 pkey_index, - int rmpp_active, - int hdr_len, int data_len, - gfp_t gfp_mask, - u8 base_version) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; - int pad, message_size, ret, size; - void *buf; - size_t mad_size; - bool opa; - - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); - - opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); - - if (opa && base_version == OPA_MGMT_BASE_VERSION) - mad_size = sizeof(struct opa_mad); - else - mad_size = sizeof(struct ib_mad); - - pad = get_pad_size(hdr_len, data_len, mad_size); - message_size = hdr_len + data_len + pad; - - if (ib_mad_kernel_rmpp_agent(mad_agent)) { - if (!rmpp_active && message_size > mad_size) - return ERR_PTR(-EINVAL); - } else - if (rmpp_active || message_size > mad_size) - return ERR_PTR(-EINVAL); - - size = rmpp_active ? hdr_len : mad_size; - buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); - if (!buf) - return ERR_PTR(-ENOMEM); - - mad_send_wr = (struct ib_mad_send_wr_private *)((char *)buf + size); - INIT_LIST_HEAD(&mad_send_wr->rmpp_list); - mad_send_wr->send_buf.mad = buf; - mad_send_wr->send_buf.hdr_len = hdr_len; - mad_send_wr->send_buf.data_len = data_len; - mad_send_wr->pad = pad; - - mad_send_wr->mad_agent_priv = mad_agent_priv; - mad_send_wr->sg_list[0].length = hdr_len; - mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; - - /* OPA MADs don't have to be the full 2048 bytes */ - if (opa && base_version == OPA_MGMT_BASE_VERSION && - data_len < mad_size - hdr_len) - mad_send_wr->sg_list[1].length = data_len; - else - mad_send_wr->sg_list[1].length = mad_size - hdr_len; - - mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; - - mad_send_wr->mad_list.cqe.done = ib_mad_send_done; - - mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; - mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.wr.num_sge = 2; - mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.remote_qpn = remote_qpn; - mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.pkey_index = pkey_index; - - if (rmpp_active) { - ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); - if (ret) { - kfree(buf); - return ERR_PTR(ret); - } - } - - mad_send_wr->send_buf.mad_agent = mad_agent; - atomic_inc(&mad_agent_priv->refcount); - return &mad_send_wr->send_buf; -} -EXPORT_SYMBOL(ib_create_send_mad); - -int ib_get_mad_data_offset(u8 mgmt_class) -{ - if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) - return IB_MGMT_SA_HDR; - else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || - (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || - (mgmt_class == IB_MGMT_CLASS_BIS)) - return IB_MGMT_DEVICE_HDR; - else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && - (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return IB_MGMT_VENDOR_HDR; - else - return IB_MGMT_MAD_HDR; -} -EXPORT_SYMBOL(ib_get_mad_data_offset); - -int ib_is_mad_class_rmpp(u8 mgmt_class) -{ - if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || - (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || - (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || - (mgmt_class == IB_MGMT_CLASS_BIS) || - ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && - (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) - return 1; - return 0; -} -EXPORT_SYMBOL(ib_is_mad_class_rmpp); - -void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) -{ - struct ib_mad_send_wr_private *mad_send_wr; - struct list_head *list; - - mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, - send_buf); - list = &mad_send_wr->cur_seg->list; - - if (mad_send_wr->cur_seg->num < seg_num) { - list_for_each_entry(mad_send_wr->cur_seg, list, list) - if (mad_send_wr->cur_seg->num == seg_num) - break; - } else if (mad_send_wr->cur_seg->num > seg_num) { - list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) - if (mad_send_wr->cur_seg->num == seg_num) - break; - } - return mad_send_wr->cur_seg->data; -} -EXPORT_SYMBOL(ib_get_rmpp_segment); - -static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) -{ - if (mad_send_wr->send_buf.seg_count) - return ib_get_rmpp_segment(&mad_send_wr->send_buf, - mad_send_wr->seg_num); - else - return (char *)mad_send_wr->send_buf.mad + - mad_send_wr->send_buf.hdr_len; -} - -void ib_free_send_mad(struct ib_mad_send_buf *send_buf) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; - - mad_agent_priv = container_of(send_buf->mad_agent, - struct ib_mad_agent_private, agent); - mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, - send_buf); - - free_send_rmpp_list(mad_send_wr); - kfree(send_buf->mad); - deref_mad_agent(mad_agent_priv); -} -EXPORT_SYMBOL(ib_free_send_mad); - -int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_mad_qp_info *qp_info; - struct list_head *list; - struct ib_send_wr *bad_send_wr; - struct ib_mad_agent *mad_agent; - struct ib_sge *sge; - unsigned long flags; - int ret; - - /* Set WR ID to find mad_send_wr upon completion */ - qp_info = mad_send_wr->mad_agent_priv->qp_info; - mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; - mad_send_wr->mad_list.cqe.done = ib_mad_send_done; - mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; - - mad_agent = mad_send_wr->send_buf.mad_agent; - sge = mad_send_wr->sg_list; - sge[0].addr = ib_dma_map_single(mad_agent->device, - mad_send_wr->send_buf.mad, - sge[0].length, - DMA_TO_DEVICE); - if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) - return -ENOMEM; - - mad_send_wr->header_mapping = sge[0].addr; - - sge[1].addr = ib_dma_map_single(mad_agent->device, - ib_get_payload(mad_send_wr), - sge[1].length, - DMA_TO_DEVICE); - if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { - ib_dma_unmap_single(mad_agent->device, - mad_send_wr->header_mapping, - sge[0].length, DMA_TO_DEVICE); - return -ENOMEM; - } - mad_send_wr->payload_mapping = sge[1].addr; - - spin_lock_irqsave(&qp_info->send_queue.lock, flags); - if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); - list = &qp_info->send_queue.list; - } else { - ret = 0; - list = &qp_info->overflow_list; - } - - if (!ret) { - qp_info->send_queue.count++; - list_add_tail(&mad_send_wr->mad_list.list, list); - } - spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) { - ib_dma_unmap_single(mad_agent->device, - mad_send_wr->header_mapping, - sge[0].length, DMA_TO_DEVICE); - ib_dma_unmap_single(mad_agent->device, - mad_send_wr->payload_mapping, - sge[1].length, DMA_TO_DEVICE); - } - return ret; -} - -/* - * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated - * with the registered client - */ -int ib_post_send_mad(struct ib_mad_send_buf *send_buf, - struct ib_mad_send_buf **bad_send_buf) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_buf *next_send_buf; - struct ib_mad_send_wr_private *mad_send_wr; - unsigned long flags; - int ret = -EINVAL; - - /* Walk list of send WRs and post each on send list */ - for (; send_buf; send_buf = next_send_buf) { - - mad_send_wr = container_of(send_buf, - struct ib_mad_send_wr_private, - send_buf); - mad_agent_priv = mad_send_wr->mad_agent_priv; - - if (!send_buf->mad_agent->send_handler || - (send_buf->timeout_ms && - !send_buf->mad_agent->recv_handler)) { - ret = -EINVAL; - goto error; - } - - if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { - if (mad_agent_priv->agent.rmpp_version) { - ret = -EINVAL; - goto error; - } - } - - /* - * Save pointer to next work request to post in case the - * current one completes, and the user modifies the work - * request associated with the completion - */ - next_send_buf = send_buf->next; - mad_send_wr->send_wr.ah = send_buf->ah; - - if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - ret = handle_outgoing_dr_smp(mad_agent_priv, - mad_send_wr); - if (ret < 0) /* error */ - goto error; - else if (ret == 1) /* locally consumed */ - continue; - } - - mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; - /* Timeout will be updated after send completes */ - mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); - mad_send_wr->max_retries = send_buf->retries; - mad_send_wr->retries_left = send_buf->retries; - send_buf->retries = 0; - /* Reference for work request to QP + response */ - mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); - mad_send_wr->status = IB_WC_SUCCESS; - - /* Reference MAD agent until send completes */ - atomic_inc(&mad_agent_priv->refcount); - spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_add_tail(&mad_send_wr->agent_list, - &mad_agent_priv->send_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { - ret = ib_send_rmpp_mad(mad_send_wr); - if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) - ret = ib_send_mad(mad_send_wr); - } else - ret = ib_send_mad(mad_send_wr); - if (ret < 0) { - /* Fail send request */ - spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_del(&mad_send_wr->agent_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); - goto error; - } - } - return 0; -error: - if (bad_send_buf) - *bad_send_buf = send_buf; - return ret; -} -EXPORT_SYMBOL(ib_post_send_mad); - -/* - * ib_free_recv_mad - Returns data buffers used to receive - * a MAD to the access layer - */ -void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; - struct ib_mad_private_header *mad_priv_hdr; - struct ib_mad_private *priv; - struct list_head free_list; - - INIT_LIST_HEAD(&free_list); - list_splice_init(&mad_recv_wc->rmpp_list, &free_list); - - list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, - &free_list, list) { - mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, - recv_buf); - mad_priv_hdr = container_of(mad_recv_wc, - struct ib_mad_private_header, - recv_wc); - priv = container_of(mad_priv_hdr, struct ib_mad_private, - header); - kfree(priv); - } -} -EXPORT_SYMBOL(ib_free_recv_mad); - -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - dev_err(&mad_agent->device->dev, - "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - -static int method_in_use(struct ib_mad_mgmt_method_table **method, - struct ib_mad_reg_req *mad_reg_req) -{ - int i; - - for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { - if ((*method)->agent[i]) { - pr_err("Method %d already in use\n", i); - return -EINVAL; - } - } - return 0; -} - -static int allocate_method_table(struct ib_mad_mgmt_method_table **method) -{ - /* Allocate management method table */ - *method = kzalloc(sizeof **method, GFP_ATOMIC); - if (!*method) { - pr_err("No memory for ib_mad_mgmt_method_table\n"); - return -ENOMEM; - } - - return 0; -} - -/* - * Check to see if there are any methods still in use - */ -static int check_method_table(struct ib_mad_mgmt_method_table *method) -{ - int i; - - for (i = 0; i < IB_MGMT_MAX_METHODS; i++) - if (method->agent[i]) - return 1; - return 0; -} - -/* - * Check to see if there are any method tables for this class still in use - */ -static int check_class_table(struct ib_mad_mgmt_class_table *class) -{ - int i; - - for (i = 0; i < MAX_MGMT_CLASS; i++) - if (class->method_table[i]) - return 1; - return 0; -} - -static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) -{ - int i; - - for (i = 0; i < MAX_MGMT_OUI; i++) - if (vendor_class->method_table[i]) - return 1; - return 0; -} - -static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, - const char *oui) -{ - int i; - - for (i = 0; i < MAX_MGMT_OUI; i++) - /* Is there matching OUI for this vendor class ? */ - if (!memcmp(vendor_class->oui[i], oui, 3)) - return i; - - return -1; -} - -static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) -{ - int i; - - for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) - if (vendor->vendor_class[i]) - return 1; - - return 0; -} - -static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, - struct ib_mad_agent_private *agent) -{ - int i; - - /* Remove any methods for this mad agent */ - for (i = 0; i < IB_MGMT_MAX_METHODS; i++) { - if (method->agent[i] == agent) { - method->agent[i] = NULL; - } - } -} - -static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, - struct ib_mad_agent_private *agent_priv, - u8 mgmt_class) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_mgmt_class_table **class; - struct ib_mad_mgmt_method_table **method; - int i, ret; - - port_priv = agent_priv->qp_info->port_priv; - class = &port_priv->version[mad_reg_req->mgmt_class_version].class; - if (!*class) { - /* Allocate management class table for "new" class version */ - *class = kzalloc(sizeof **class, GFP_ATOMIC); - if (!*class) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_class_table\n"); - ret = -ENOMEM; - goto error1; - } - - /* Allocate method table for this management class */ - method = &(*class)->method_table[mgmt_class]; - if ((ret = allocate_method_table(method))) - goto error2; - } else { - method = &(*class)->method_table[mgmt_class]; - if (!*method) { - /* Allocate method table for this management class */ - if ((ret = allocate_method_table(method))) - goto error1; - } - } - - /* Now, make sure methods are not already in use */ - if (method_in_use(method, mad_reg_req)) - goto error3; - - /* Finally, add in methods being registered */ - for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) - (*method)->agent[i] = agent_priv; - - return 0; - -error3: - /* Remove any methods for this mad agent */ - remove_methods_mad_agent(*method, agent_priv); - /* Now, check to see if there are any methods in use */ - if (!check_method_table(*method)) { - /* If not, release management method table */ - kfree(*method); - *method = NULL; - } - ret = -EINVAL; - goto error1; -error2: - kfree(*class); - *class = NULL; -error1: - return ret; -} - -static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, - struct ib_mad_agent_private *agent_priv) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_mgmt_vendor_class_table **vendor_table; - struct ib_mad_mgmt_vendor_class_table *vendor = NULL; - struct ib_mad_mgmt_vendor_class *vendor_class = NULL; - struct ib_mad_mgmt_method_table **method; - int i, ret = -ENOMEM; - u8 vclass; - - /* "New" vendor (with OUI) class */ - vclass = vendor_class_index(mad_reg_req->mgmt_class); - port_priv = agent_priv->qp_info->port_priv; - vendor_table = &port_priv->version[ - mad_reg_req->mgmt_class_version].vendor; - if (!*vendor_table) { - /* Allocate mgmt vendor class table for "new" class version */ - vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); - if (!vendor) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_vendor_class_table\n"); - goto error1; - } - - *vendor_table = vendor; - } - if (!(*vendor_table)->vendor_class[vclass]) { - /* Allocate table for this management vendor class */ - vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); - if (!vendor_class) { - dev_err(&agent_priv->agent.device->dev, - "No memory for ib_mad_mgmt_vendor_class\n"); - goto error2; - } - - (*vendor_table)->vendor_class[vclass] = vendor_class; - } - for (i = 0; i < MAX_MGMT_OUI; i++) { - /* Is there matching OUI for this vendor class ? */ - if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], - mad_reg_req->oui, 3)) { - method = &(*vendor_table)->vendor_class[ - vclass]->method_table[i]; - BUG_ON(!*method); - goto check_in_use; - } - } - for (i = 0; i < MAX_MGMT_OUI; i++) { - /* OUI slot available ? */ - if (!is_vendor_oui((*vendor_table)->vendor_class[ - vclass]->oui[i])) { - method = &(*vendor_table)->vendor_class[ - vclass]->method_table[i]; - BUG_ON(*method); - /* Allocate method table for this OUI */ - if ((ret = allocate_method_table(method))) - goto error3; - memcpy((*vendor_table)->vendor_class[vclass]->oui[i], - mad_reg_req->oui, 3); - goto check_in_use; - } - } - dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); - goto error3; - -check_in_use: - /* Now, make sure methods are not already in use */ - if (method_in_use(method, mad_reg_req)) - goto error4; - - /* Finally, add in methods being registered */ - for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) - (*method)->agent[i] = agent_priv; - - return 0; - -error4: - /* Remove any methods for this mad agent */ - remove_methods_mad_agent(*method, agent_priv); - /* Now, check to see if there are any methods in use */ - if (!check_method_table(*method)) { - /* If not, release management method table */ - kfree(*method); - *method = NULL; - } - ret = -EINVAL; -error3: - if (vendor_class) { - (*vendor_table)->vendor_class[vclass] = NULL; - kfree(vendor_class); - } -error2: - if (vendor) { - *vendor_table = NULL; - kfree(vendor); - } -error1: - return ret; -} - -static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_mgmt_class_table *class; - struct ib_mad_mgmt_method_table *method; - struct ib_mad_mgmt_vendor_class_table *vendor; - struct ib_mad_mgmt_vendor_class *vendor_class; - int index; - u8 mgmt_class; - - /* - * Was MAD registration request supplied - * with original registration ? - */ - if (!agent_priv->reg_req) { - goto out; - } - - port_priv = agent_priv->qp_info->port_priv; - mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); - class = port_priv->version[ - agent_priv->reg_req->mgmt_class_version].class; - if (!class) - goto vendor_check; - - method = class->method_table[mgmt_class]; - if (method) { - /* Remove any methods for this mad agent */ - remove_methods_mad_agent(method, agent_priv); - /* Now, check to see if there are any methods still in use */ - if (!check_method_table(method)) { - /* If not, release management method table */ - kfree(method); - class->method_table[mgmt_class] = NULL; - /* Any management classes left ? */ - if (!check_class_table(class)) { - /* If not, release management class table */ - kfree(class); - port_priv->version[ - agent_priv->reg_req-> - mgmt_class_version].class = NULL; - } - } - } - -vendor_check: - if (!is_vendor_class(mgmt_class)) - goto out; - - /* normalize mgmt_class to vendor range 2 */ - mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); - vendor = port_priv->version[ - agent_priv->reg_req->mgmt_class_version].vendor; - - if (!vendor) - goto out; - - vendor_class = vendor->vendor_class[mgmt_class]; - if (vendor_class) { - index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); - if (index < 0) - goto out; - method = vendor_class->method_table[index]; - if (method) { - /* Remove any methods for this mad agent */ - remove_methods_mad_agent(method, agent_priv); - /* - * Now, check to see if there are - * any methods still in use - */ - if (!check_method_table(method)) { - /* If not, release management method table */ - kfree(method); - vendor_class->method_table[index] = NULL; - memset(vendor_class->oui[index], 0, 3); - /* Any OUIs left ? */ - if (!check_vendor_class(vendor_class)) { - /* If not, release vendor class table */ - kfree(vendor_class); - vendor->vendor_class[mgmt_class] = NULL; - /* Any other vendor classes left ? */ - if (!check_vendor_table(vendor)) { - kfree(vendor); - port_priv->version[ - agent_priv->reg_req-> - mgmt_class_version]. - vendor = NULL; - } - } - } - } - } - -out: - return; -} - -static struct ib_mad_agent_private * -find_mad_agent(struct ib_mad_port_private *port_priv, - const struct ib_mad_hdr *mad_hdr) -{ - struct ib_mad_agent_private *mad_agent = NULL; - unsigned long flags; - - spin_lock_irqsave(&port_priv->reg_lock, flags); - if (ib_response_mad(mad_hdr)) { - u32 hi_tid; - struct ib_mad_agent_private *entry; - - /* - * Routing is based on high 32 bits of transaction ID - * of MAD. - */ - hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, agent_list) { - if (entry->agent.hi_tid == hi_tid) { - mad_agent = entry; - break; - } - } - } else { - struct ib_mad_mgmt_class_table *class; - struct ib_mad_mgmt_method_table *method; - struct ib_mad_mgmt_vendor_class_table *vendor; - struct ib_mad_mgmt_vendor_class *vendor_class; - const struct ib_vendor_mad *vendor_mad; - int index; - - /* - * Routing is based on version, class, and method - * For "newer" vendor MADs, also based on OUI - */ - if (mad_hdr->class_version >= MAX_MGMT_VERSION) - goto out; - if (!is_vendor_class(mad_hdr->mgmt_class)) { - class = port_priv->version[ - mad_hdr->class_version].class; - if (!class) - goto out; - if (convert_mgmt_class(mad_hdr->mgmt_class) >= - IB_MGMT_MAX_METHODS) - goto out; - method = class->method_table[convert_mgmt_class( - mad_hdr->mgmt_class)]; - if (method) - mad_agent = method->agent[mad_hdr->method & - ~IB_MGMT_METHOD_RESP]; - } else { - vendor = port_priv->version[ - mad_hdr->class_version].vendor; - if (!vendor) - goto out; - vendor_class = vendor->vendor_class[vendor_class_index( - mad_hdr->mgmt_class)]; - if (!vendor_class) - goto out; - /* Find matching OUI */ - vendor_mad = (const struct ib_vendor_mad *)mad_hdr; - index = find_vendor_oui(vendor_class, vendor_mad->oui); - if (index == -1) - goto out; - method = vendor_class->method_table[index]; - if (method) { - mad_agent = method->agent[mad_hdr->method & - ~IB_MGMT_METHOD_RESP]; - } - } - } - - if (mad_agent) { - if (mad_agent->agent.recv_handler) - atomic_inc(&mad_agent->refcount); - else { - dev_notice(&port_priv->device->dev, - "No receive handler for client %p on port %d\n", - &mad_agent->agent, port_priv->port_num); - mad_agent = NULL; - } - } -out: - spin_unlock_irqrestore(&port_priv->reg_lock, flags); - - return mad_agent; -} - -static int validate_mad(const struct ib_mad_hdr *mad_hdr, - const struct ib_mad_qp_info *qp_info, - bool opa) -{ - int valid = 0; - u32 qp_num = qp_info->qp->qp_num; - - /* Make sure MAD base version is understood */ - if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && - (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { - pr_err("MAD received with unsupported base version %d %s\n", - mad_hdr->base_version, opa ? "(opa)" : ""); - goto out; - } - - /* Filter SMI packets sent to other than QP0 */ - if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || - (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - if (qp_num == 0) - valid = 1; - } else { - /* CM attributes other than ClassPortInfo only use Send method */ - if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && - (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && - (mad_hdr->method != IB_MGMT_METHOD_SEND)) - goto out; - /* Filter GSI packets sent to QP0 */ - if (qp_num != 0) - valid = 1; - } - -out: - return valid; -} - -static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, - const struct ib_mad_hdr *mad_hdr) -{ - const struct ib_rmpp_mad *rmpp_mad; - - rmpp_mad = (const struct ib_rmpp_mad *)mad_hdr; - return !mad_agent_priv->agent.rmpp_version || - !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || - !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE) || - (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); -} - -static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, - const struct ib_mad_recv_wc *rwc) -{ - return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == - rwc->recv_buf.mad->mad_hdr.mgmt_class; -} - -static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, - const struct ib_mad_send_wr_private *wr, - const struct ib_mad_recv_wc *rwc ) -{ - struct ib_ah_attr attr; - u8 send_resp, rcv_resp; - union ib_gid sgid; - struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num = mad_agent_priv->agent.port_num; - u8 lmc; - - send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); - rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); - - if (send_resp == rcv_resp) - /* both requests, or both responses. GIDs different */ - return 0; - - if (ib_query_ah(wr->send_buf.ah, &attr)) - /* Assume not equal, to avoid false positives. */ - return 0; - - if (!!(attr.ah_flags & IB_AH_GRH) != - !!(rwc->wc->wc_flags & IB_WC_GRH)) - /* one has GID, other does not. Assume different */ - return 0; - - if (!send_resp && rcv_resp) { - /* is request/response. */ - if (!(attr.ah_flags & IB_AH_GRH)) { - if (ib_get_cached_lmc(device, port_num, &lmc)) - return 0; - return (!lmc || !((attr.src_path_bits ^ - rwc->wc->dlid_path_bits) & - ((1 << lmc) - 1))); - } else { - if (ib_get_cached_gid(device, port_num, - attr.grh.sgid_index, &sgid, NULL)) - return 0; - return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, - 16); - } - } - - if (!(attr.ah_flags & IB_AH_GRH)) - return attr.dlid == rwc->wc->slid; - else - return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, - 16); -} - -static inline int is_direct(u8 class) -{ - return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); -} - -struct ib_mad_send_wr_private* -ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, - const struct ib_mad_recv_wc *wc) -{ - struct ib_mad_send_wr_private *wr; - const struct ib_mad_hdr *mad_hdr; - - mad_hdr = &wc->recv_buf.mad->mad_hdr; - - list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { - if ((wr->tid == mad_hdr->tid) && - rcv_has_same_class(wr, wc) && - /* - * Don't check GID for direct routed MADs. - * These might have permissive LIDs. - */ - (is_direct(mad_hdr->mgmt_class) || - rcv_has_same_gid(mad_agent_priv, wr, wc))) - return (wr->status == IB_WC_SUCCESS) ? wr : NULL; - } - - /* - * It's possible to receive the response before we've - * been notified that the send has completed - */ - list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { - if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && - wr->tid == mad_hdr->tid && - wr->timeout && - rcv_has_same_class(wr, wc) && - /* - * Don't check GID for direct routed MADs. - * These might have permissive LIDs. - */ - (is_direct(mad_hdr->mgmt_class) || - rcv_has_same_gid(mad_agent_priv, wr, wc))) - /* Verify request has not been canceled */ - return (wr->status == IB_WC_SUCCESS) ? wr : NULL; - } - return NULL; -} - -void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) -{ - mad_send_wr->timeout = 0; - if (mad_send_wr->refcount == 1) - list_move_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->done_list); -} - -static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc mad_send_wc; - unsigned long flags; - - INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); - list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); - if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { - mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, - mad_recv_wc); - if (!mad_recv_wc) { - deref_mad_agent(mad_agent_priv); - return; - } - } - - /* Complete corresponding request */ - if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { - spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); - if (!mad_send_wr) { - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) - && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) - && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) - & IB_MGMT_RMPP_FLAG_ACTIVE)) { - /* user rmpp is in effect - * and this is an active RMPP MAD - */ - mad_agent_priv->agent.recv_handler( - &mad_agent_priv->agent, NULL, - mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); - } else { - /* not user rmpp, revert to normal behavior and - * drop the mad */ - ib_free_recv_mad(mad_recv_wc); - deref_mad_agent(mad_agent_priv); - return; - } - } else { - ib_mark_mad_done(mad_send_wr); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - /* Defined behavior is to complete response before request */ - mad_agent_priv->agent.recv_handler( - &mad_agent_priv->agent, - &mad_send_wr->send_buf, - mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); - - mad_send_wc.status = IB_WC_SUCCESS; - mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); - } - } else { - mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, - mad_recv_wc); - deref_mad_agent(mad_agent_priv); - } -} - -static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, - const struct ib_mad_qp_info *qp_info, - const struct ib_wc *wc, - int port_num, - struct ib_mad_private *recv, - struct ib_mad_private *response) -{ - enum smi_forward_action retsmi; - struct ib_smp *smp = (struct ib_smp *)recv->mad; - - if (smi_handle_dr_smp_recv(smp, - rdma_cap_ib_switch(port_priv->device), - port_num, - port_priv->device->phys_port_cnt) == - IB_SMI_DISCARD) - return IB_SMI_DISCARD; - - retsmi = smi_check_forward_dr_smp(smp); - if (retsmi == IB_SMI_LOCAL) - return IB_SMI_HANDLE; - - if (retsmi == IB_SMI_SEND) { /* don't forward */ - if (smi_handle_dr_smp_send(smp, - rdma_cap_ib_switch(port_priv->device), - port_num) == IB_SMI_DISCARD) - return IB_SMI_DISCARD; - - if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) - return IB_SMI_DISCARD; - } else if (rdma_cap_ib_switch(port_priv->device)) { - /* forward case for switches */ - memcpy(response, recv, mad_priv_size(response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - - agent_send_response((const struct ib_mad_hdr *)response->mad, - &response->grh, wc, - port_priv->device, - smi_get_fwd_port(smp), - qp_info->qp->qp_num, - response->mad_size, - false); - - return IB_SMI_DISCARD; - } - return IB_SMI_HANDLE; -} - -static bool generate_unmatched_resp(const struct ib_mad_private *recv, - struct ib_mad_private *response, - size_t *resp_len, bool opa) -{ - const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; - struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; - - if (recv_hdr->method == IB_MGMT_METHOD_GET || - recv_hdr->method == IB_MGMT_METHOD_SET) { - memcpy(response, recv, mad_priv_size(response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - resp_hdr->method = IB_MGMT_METHOD_GET_RESP; - resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); - if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - resp_hdr->status |= IB_SMP_DIRECTION; - - if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { - if (recv_hdr->mgmt_class == - IB_MGMT_CLASS_SUBN_LID_ROUTED || - recv_hdr->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - *resp_len = opa_get_smp_header_size( - (const struct opa_smp *)recv->mad); - else - *resp_len = sizeof(struct ib_mad_hdr); - } - - return true; - } else { - return false; - } -} - -static enum smi_action -handle_opa_smi(struct ib_mad_port_private *port_priv, - struct ib_mad_qp_info *qp_info, - struct ib_wc *wc, - int port_num, - struct ib_mad_private *recv, - struct ib_mad_private *response) -{ - enum smi_forward_action retsmi; - struct opa_smp *smp = (struct opa_smp *)recv->mad; - - if (opa_smi_handle_dr_smp_recv(smp, - rdma_cap_ib_switch(port_priv->device), - port_num, - port_priv->device->phys_port_cnt) == - IB_SMI_DISCARD) - return IB_SMI_DISCARD; - - retsmi = opa_smi_check_forward_dr_smp(smp); - if (retsmi == IB_SMI_LOCAL) - return IB_SMI_HANDLE; - - if (retsmi == IB_SMI_SEND) { /* don't forward */ - if (opa_smi_handle_dr_smp_send(smp, - rdma_cap_ib_switch(port_priv->device), - port_num) == IB_SMI_DISCARD) - return IB_SMI_DISCARD; - - if (opa_smi_check_local_smp(smp, port_priv->device) == - IB_SMI_DISCARD) - return IB_SMI_DISCARD; - - } else if (rdma_cap_ib_switch(port_priv->device)) { - /* forward case for switches */ - memcpy(response, recv, mad_priv_size(response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.opa_mad = - (struct opa_mad *)response->mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - - agent_send_response((const struct ib_mad_hdr *)response->mad, - &response->grh, wc, - port_priv->device, - opa_smi_get_fwd_port(smp), - qp_info->qp->qp_num, - recv->header.wc.byte_len, - true); - - return IB_SMI_DISCARD; - } - - return IB_SMI_HANDLE; -} - -static enum smi_action -handle_smi(struct ib_mad_port_private *port_priv, - struct ib_mad_qp_info *qp_info, - struct ib_wc *wc, - int port_num, - struct ib_mad_private *recv, - struct ib_mad_private *response, - bool opa) -{ - struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; - - if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && - mad_hdr->class_version == OPA_SMI_CLASS_VERSION) - return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, - response); - - return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); -} - -static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_mad_port_private *port_priv = cq->cq_context; - struct ib_mad_list_head *mad_list = - container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); - struct ib_mad_qp_info *qp_info; - struct ib_mad_private_header *mad_priv_hdr; - struct ib_mad_private *recv, *response = NULL; - struct ib_mad_agent_private *mad_agent; - int port_num; - int ret = IB_MAD_RESULT_SUCCESS; - size_t mad_size; - u16 resp_mad_pkey_index = 0; - bool opa; - - if (list_empty_careful(&port_priv->port_list)) - return; - - if (wc->status != IB_WC_SUCCESS) { - /* - * Receive errors indicate that the QP has entered the error - * state - error handling/shutdown code will cleanup - */ - return; - } - - qp_info = mad_list->mad_queue->qp_info; - dequeue_mad(mad_list); - - opa = rdma_cap_opa_mad(qp_info->port_priv->device, - qp_info->port_priv->port_num); - - mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, - mad_list); - recv = container_of(mad_priv_hdr, struct ib_mad_private, header); - ib_dma_unmap_single(port_priv->device, - recv->header.mapping, - mad_priv_dma_size(recv), - DMA_FROM_DEVICE); - - /* Setup MAD receive work completion from "normal" work completion */ - recv->header.wc = *wc; - recv->header.recv_wc.wc = &recv->header.wc; - - if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { - recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); - recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); - } else { - recv->header.recv_wc.mad_len = sizeof(struct ib_mad); - recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); - } - - recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; - recv->header.recv_wc.recv_buf.grh = &recv->grh; - - if (atomic_read(&qp_info->snoop_count)) - snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); - - /* Validate MAD */ - if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) - goto out; - - mad_size = recv->mad_size; - response = alloc_mad_private(mad_size, GFP_KERNEL); - if (!response) { - dev_err(&port_priv->device->dev, - "%s: no memory for response buffer\n", __func__); - goto out; - } - - if (rdma_cap_ib_switch(port_priv->device)) - port_num = wc->port_num; - else - port_num = port_priv->port_num; - - if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (handle_smi(port_priv, qp_info, wc, port_num, recv, - response, opa) - == IB_SMI_DISCARD) - goto out; - } - - /* Give driver "right of first refusal" on incoming MAD */ - if (port_priv->device->process_mad) { - ret = port_priv->device->process_mad(port_priv->device, 0, - port_priv->port_num, - wc, &recv->grh, - (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, - (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); - - if (opa) - wc->pkey_index = resp_mad_pkey_index; - - if (ret & IB_MAD_RESULT_SUCCESS) { - if (ret & IB_MAD_RESULT_CONSUMED) - goto out; - if (ret & IB_MAD_RESULT_REPLY) { - agent_send_response((const struct ib_mad_hdr *)response->mad, - &recv->grh, wc, - port_priv->device, - port_num, - qp_info->qp->qp_num, - mad_size, opa); - goto out; - } - } - } - - mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); - if (mad_agent) { - ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); - /* - * recv is freed up in error cases in ib_mad_complete_recv - * or via recv_handler in ib_mad_complete_recv() - */ - recv = NULL; - } else if ((ret & IB_MAD_RESULT_SUCCESS) && - generate_unmatched_resp(recv, response, &mad_size, opa)) { - agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, - port_priv->device, port_num, - qp_info->qp->qp_num, mad_size, opa); - } - -out: - /* Post another receive request for this QP */ - if (response) { - ib_mad_post_receive_mads(qp_info, response); - kfree(recv); - } else - ib_mad_post_receive_mads(qp_info, recv); -} - -static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) -{ - struct ib_mad_send_wr_private *mad_send_wr; - unsigned long delay; - - if (list_empty(&mad_agent_priv->wait_list)) { - cancel_delayed_work(&mad_agent_priv->timed_work); - } else { - mad_send_wr = list_entry(mad_agent_priv->wait_list.next, - struct ib_mad_send_wr_private, - agent_list); - - if (time_after(mad_agent_priv->timeout, - mad_send_wr->timeout)) { - mad_agent_priv->timeout = mad_send_wr->timeout; - delay = mad_send_wr->timeout - jiffies; - if ((long)delay <= 0) - delay = 1; - mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timed_work, delay); - } - } -} - -static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *temp_mad_send_wr; - struct list_head *list_item; - unsigned long delay; - - mad_agent_priv = mad_send_wr->mad_agent_priv; - list_del(&mad_send_wr->agent_list); - - delay = mad_send_wr->timeout; - mad_send_wr->timeout += jiffies; - - if (delay) { - list_for_each_prev(list_item, &mad_agent_priv->wait_list) { - temp_mad_send_wr = list_entry(list_item, - struct ib_mad_send_wr_private, - agent_list); - if (time_after(mad_send_wr->timeout, - temp_mad_send_wr->timeout)) - break; - } - } - else - list_item = &mad_agent_priv->wait_list; - list_add(&mad_send_wr->agent_list, list_item); - - /* Reschedule a work item if we have a shorter timeout */ - if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) - mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timed_work, delay); -} - -void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, - int timeout_ms) -{ - mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); - wait_for_response(mad_send_wr); -} - -/* - * Process a send work completion - */ -void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, - struct ib_mad_send_wc *mad_send_wc) -{ - struct ib_mad_agent_private *mad_agent_priv; - unsigned long flags; - int ret; - - mad_agent_priv = mad_send_wr->mad_agent_priv; - spin_lock_irqsave(&mad_agent_priv->lock, flags); - if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { - ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); - if (ret == IB_RMPP_RESULT_CONSUMED) - goto done; - } else - ret = IB_RMPP_RESULT_UNHANDLED; - - if (mad_send_wc->status != IB_WC_SUCCESS && - mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = mad_send_wc->status; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - - if (--mad_send_wr->refcount > 0) { - if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && - mad_send_wr->status == IB_WC_SUCCESS) { - wait_for_response(mad_send_wr); - } - goto done; - } - - /* Remove send from MAD agent and notify client of completion */ - list_del(&mad_send_wr->agent_list); - adjust_timeout(mad_agent_priv); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - if (mad_send_wr->status != IB_WC_SUCCESS ) - mad_send_wc->status = mad_send_wr->status; - if (ret == IB_RMPP_RESULT_INTERNAL) - ib_rmpp_send_handler(mad_send_wc); - else - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - mad_send_wc); - - /* Release reference on agent taken when sending */ - deref_mad_agent(mad_agent_priv); - return; -done: - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -} - -static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_mad_port_private *port_priv = cq->cq_context; - struct ib_mad_list_head *mad_list = - container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); - struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; - struct ib_mad_qp_info *qp_info; - struct ib_mad_queue *send_queue; - struct ib_send_wr *bad_send_wr; - struct ib_mad_send_wc mad_send_wc; - unsigned long flags; - int ret; - - if (list_empty_careful(&port_priv->port_list)) - return; - - if (wc->status != IB_WC_SUCCESS) { - if (!ib_mad_send_error(port_priv, wc)) - return; - } - - mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, - mad_list); - send_queue = mad_list->mad_queue; - qp_info = send_queue->qp_info; - -retry: - ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, - mad_send_wr->header_mapping, - mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); - ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, - mad_send_wr->payload_mapping, - mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); - queued_send_wr = NULL; - spin_lock_irqsave(&send_queue->lock, flags); - list_del(&mad_list->list); - - /* Move queued send to the send queue */ - if (send_queue->count-- > send_queue->max_active) { - mad_list = container_of(qp_info->overflow_list.next, - struct ib_mad_list_head, list); - queued_send_wr = container_of(mad_list, - struct ib_mad_send_wr_private, - mad_list); - list_move_tail(&mad_list->list, &send_queue->list); - } - spin_unlock_irqrestore(&send_queue->lock, flags); - - mad_send_wc.send_buf = &mad_send_wr->send_buf; - mad_send_wc.status = wc->status; - mad_send_wc.vendor_err = wc->vendor_err; - if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); - - if (queued_send_wr) { - ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, - &bad_send_wr); - if (ret) { - dev_err(&port_priv->device->dev, - "ib_post_send failed: %d\n", ret); - mad_send_wr = queued_send_wr; - wc->status = IB_WC_LOC_QP_OP_ERR; - goto retry; - } - } -} - -static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) -{ - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_list_head *mad_list; - unsigned long flags; - - spin_lock_irqsave(&qp_info->send_queue.lock, flags); - list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { - mad_send_wr = container_of(mad_list, - struct ib_mad_send_wr_private, - mad_list); - mad_send_wr->retry = 1; - } - spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); -} - -static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, - struct ib_wc *wc) -{ - struct ib_mad_list_head *mad_list = - container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); - struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; - struct ib_mad_send_wr_private *mad_send_wr; - int ret; - - /* - * Send errors will transition the QP to SQE - move - * QP to RTS and repost flushed work requests - */ - mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, - mad_list); - if (wc->status == IB_WC_WR_FLUSH_ERR) { - if (mad_send_wr->retry) { - /* Repost send */ - struct ib_send_wr *bad_send_wr; - - mad_send_wr->retry = 0; - ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, - &bad_send_wr); - if (!ret) - return false; - } - } else { - struct ib_qp_attr *attr; - - /* Transition QP to RTS and fail offending send */ - attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (attr) { - attr->qp_state = IB_QPS_RTS; - attr->cur_qp_state = IB_QPS_SQE; - ret = ib_modify_qp(qp_info->qp, attr, - IB_QP_STATE | IB_QP_CUR_STATE); - kfree(attr); - if (ret) - dev_err(&port_priv->device->dev, - "%s - ib_modify_qp to RTS: %d\n", - __func__, ret); - else - mark_sends_for_retry(qp_info); - } - } - - return true; -} - -static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) -{ - unsigned long flags; - struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; - struct ib_mad_send_wc mad_send_wc; - struct list_head cancel_list; - - INIT_LIST_HEAD(&cancel_list); - - spin_lock_irqsave(&mad_agent_priv->lock, flags); - list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, - &mad_agent_priv->send_list, agent_list) { - if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - } - - /* Empty wait list to prevent receives from finding a request */ - list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - /* Report all cancelled requests */ - mad_send_wc.status = IB_WC_WR_FLUSH_ERR; - mad_send_wc.vendor_err = 0; - - list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, - &cancel_list, agent_list) { - mad_send_wc.send_buf = &mad_send_wr->send_buf; - list_del(&mad_send_wr->agent_list); - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); - } -} - -static struct ib_mad_send_wr_private* -find_send_wr(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_buf *send_buf) -{ - struct ib_mad_send_wr_private *mad_send_wr; - - list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, - agent_list) { - if (&mad_send_wr->send_buf == send_buf) - return mad_send_wr; - } - - list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, - agent_list) { - if (is_rmpp_data_mad(mad_agent_priv, - mad_send_wr->send_buf.mad) && - &mad_send_wr->send_buf == send_buf) - return mad_send_wr; - } - return NULL; -} - -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; - unsigned long flags; - int active; - - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); - spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_wr(mad_agent_priv, send_buf); - if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - return -EINVAL; - } - - active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); - if (!timeout_ms) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; - mad_send_wr->refcount -= (mad_send_wr->timeout > 0); - } - - mad_send_wr->send_buf.timeout_ms = timeout_ms; - if (active) - mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); - else - ib_reset_mad_timeout(mad_send_wr, timeout_ms); - - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - return 0; -} -EXPORT_SYMBOL(ib_modify_mad); - -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf) -{ - ib_modify_mad(mad_agent, send_buf, 0); -} -EXPORT_SYMBOL(ib_cancel_mad); - -static void local_completions(struct work_struct *work) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_local_private *local; - struct ib_mad_agent_private *recv_mad_agent; - unsigned long flags; - int free_mad; - struct ib_wc wc; - struct ib_mad_send_wc mad_send_wc; - bool opa; - - mad_agent_priv = - container_of(work, struct ib_mad_agent_private, local_work); - - opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, - mad_agent_priv->qp_info->port_priv->port_num); - - spin_lock_irqsave(&mad_agent_priv->lock, flags); - while (!list_empty(&mad_agent_priv->local_list)) { - local = list_entry(mad_agent_priv->local_list.next, - struct ib_mad_local_private, - completion_list); - list_del(&local->completion_list); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - free_mad = 0; - if (local->mad_priv) { - u8 base_version; - recv_mad_agent = local->recv_mad_agent; - if (!recv_mad_agent) { - dev_err(&mad_agent_priv->agent.device->dev, - "No receive MAD agent for local completion\n"); - free_mad = 1; - goto local_send_completion; - } - - /* - * Defined behavior is to complete response - * before request - */ - build_smp_wc(recv_mad_agent->agent.qp, - local->mad_send_wr->send_wr.wr.wr_cqe, - be16_to_cpu(IB_LID_PERMISSIVE), - local->mad_send_wr->send_wr.pkey_index, - recv_mad_agent->agent.port_num, &wc); - - local->mad_priv->header.recv_wc.wc = &wc; - - base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; - if (opa && base_version == OPA_MGMT_BASE_VERSION) { - local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; - local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); - } else { - local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); - local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); - } - - INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); - list_add(&local->mad_priv->header.recv_wc.recv_buf.list, - &local->mad_priv->header.recv_wc.rmpp_list); - local->mad_priv->header.recv_wc.recv_buf.grh = NULL; - local->mad_priv->header.recv_wc.recv_buf.mad = - (struct ib_mad *)local->mad_priv->mad; - if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) - snoop_recv(recv_mad_agent->qp_info, - &local->mad_priv->header.recv_wc, - IB_MAD_SNOOP_RECVS); - recv_mad_agent->agent.recv_handler( - &recv_mad_agent->agent, - &local->mad_send_wr->send_buf, - &local->mad_priv->header.recv_wc); - spin_lock_irqsave(&recv_mad_agent->lock, flags); - atomic_dec(&recv_mad_agent->refcount); - spin_unlock_irqrestore(&recv_mad_agent->lock, flags); - } - -local_send_completion: - /* Complete send */ - mad_send_wc.status = IB_WC_SUCCESS; - mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &local->mad_send_wr->send_buf; - if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, - &local->mad_send_wr->send_buf, - &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - - spin_lock_irqsave(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); - if (free_mad) - kfree(local->mad_priv); - kfree(local); - } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -} - -static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) -{ - int ret; - - if (!mad_send_wr->retries_left) - return -ETIMEDOUT; - - mad_send_wr->retries_left--; - mad_send_wr->send_buf.retries++; - - mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); - - if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { - ret = ib_retry_rmpp(mad_send_wr); - switch (ret) { - case IB_RMPP_RESULT_UNHANDLED: - ret = ib_send_mad(mad_send_wr); - break; - case IB_RMPP_RESULT_CONSUMED: - ret = 0; - break; - default: - ret = -ECOMM; - break; - } - } else - ret = ib_send_mad(mad_send_wr); - - if (!ret) { - mad_send_wr->refcount++; - list_add_tail(&mad_send_wr->agent_list, - &mad_send_wr->mad_agent_priv->send_list); - } - return ret; -} - -static void timeout_sends(struct work_struct *work) -{ - struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_mad_send_wc mad_send_wc; - unsigned long flags, delay; - - mad_agent_priv = container_of(work, struct ib_mad_agent_private, - timed_work.work); - mad_send_wc.vendor_err = 0; - - spin_lock_irqsave(&mad_agent_priv->lock, flags); - while (!list_empty(&mad_agent_priv->wait_list)) { - mad_send_wr = list_entry(mad_agent_priv->wait_list.next, - struct ib_mad_send_wr_private, - agent_list); - - if (time_after(mad_send_wr->timeout, jiffies)) { - delay = mad_send_wr->timeout - jiffies; - if ((long)delay <= 0) - delay = 1; - queue_delayed_work(mad_agent_priv->qp_info-> - port_priv->wq, - &mad_agent_priv->timed_work, delay); - break; - } - - list_del(&mad_send_wr->agent_list); - if (mad_send_wr->status == IB_WC_SUCCESS && - !retry_send(mad_send_wr)) - continue; - - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - - if (mad_send_wr->status == IB_WC_SUCCESS) - mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; - else - mad_send_wc.status = mad_send_wr->status; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, - &mad_send_wc); - - atomic_dec(&mad_agent_priv->refcount); - spin_lock_irqsave(&mad_agent_priv->lock, flags); - } - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); -} - -/* - * Allocate receive MADs and post receive WRs for them - */ -static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, - struct ib_mad_private *mad) -{ - unsigned long flags; - int post, ret; - struct ib_mad_private *mad_priv; - struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; - struct ib_mad_queue *recv_queue = &qp_info->recv_queue; - - /* Initialize common scatter list fields */ - sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; - - /* Initialize common receive WR fields */ - recv_wr.next = NULL; - recv_wr.sg_list = &sg_list; - recv_wr.num_sge = 1; - - do { - /* Allocate and map receive buffer */ - if (mad) { - mad_priv = mad; - mad = NULL; - } else { - mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), - GFP_ATOMIC); - if (!mad_priv) { - dev_err(&qp_info->port_priv->device->dev, - "No memory for receive buffer\n"); - ret = -ENOMEM; - break; - } - } - sg_list.length = mad_priv_dma_size(mad_priv); - sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, - &mad_priv->grh, - mad_priv_dma_size(mad_priv), - DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, - sg_list.addr))) { - ret = -ENOMEM; - break; - } - mad_priv->header.mapping = sg_list.addr; - mad_priv->header.mad_list.mad_queue = recv_queue; - mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; - recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; - - /* Post receive WR */ - spin_lock_irqsave(&recv_queue->lock, flags); - post = (++recv_queue->count < recv_queue->max_active); - list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); - spin_unlock_irqrestore(&recv_queue->lock, flags); - ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); - if (ret) { - spin_lock_irqsave(&recv_queue->lock, flags); - list_del(&mad_priv->header.mad_list.list); - recv_queue->count--; - spin_unlock_irqrestore(&recv_queue->lock, flags); - ib_dma_unmap_single(qp_info->port_priv->device, - mad_priv->header.mapping, - mad_priv_dma_size(mad_priv), - DMA_FROM_DEVICE); - kfree(mad_priv); - dev_err(&qp_info->port_priv->device->dev, - "ib_post_recv failed: %d\n", ret); - break; - } - } while (post); - - return ret; -} - -/* - * Return all the posted receive MADs - */ -static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) -{ - struct ib_mad_private_header *mad_priv_hdr; - struct ib_mad_private *recv; - struct ib_mad_list_head *mad_list; - - if (!qp_info->qp) - return; - - while (!list_empty(&qp_info->recv_queue.list)) { - - mad_list = list_entry(qp_info->recv_queue.list.next, - struct ib_mad_list_head, list); - mad_priv_hdr = container_of(mad_list, - struct ib_mad_private_header, - mad_list); - recv = container_of(mad_priv_hdr, struct ib_mad_private, - header); - - /* Remove from posted receive MAD list */ - list_del(&mad_list->list); - - ib_dma_unmap_single(qp_info->port_priv->device, - recv->header.mapping, - mad_priv_dma_size(recv), - DMA_FROM_DEVICE); - kfree(recv); - } - - qp_info->recv_queue.count = 0; -} - -/* - * Start the port - */ -static int ib_mad_port_start(struct ib_mad_port_private *port_priv) -{ - int ret, i; - struct ib_qp_attr *attr; - struct ib_qp *qp; - u16 pkey_index; - - attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { - dev_err(&port_priv->device->dev, - "Couldn't kmalloc ib_qp_attr\n"); - return -ENOMEM; - } - - ret = ib_find_pkey(port_priv->device, port_priv->port_num, - IB_DEFAULT_PKEY_FULL, &pkey_index); - if (ret) - pkey_index = 0; - - for (i = 0; i < IB_MAD_QPS_CORE; i++) { - qp = port_priv->qp_info[i].qp; - if (!qp) - continue; - - /* - * PKey index for QP1 is irrelevant but - * one is needed for the Reset to Init transition - */ - attr->qp_state = IB_QPS_INIT; - attr->pkey_index = pkey_index; - attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; - ret = ib_modify_qp(qp, attr, IB_QP_STATE | - IB_QP_PKEY_INDEX | IB_QP_QKEY); - if (ret) { - dev_err(&port_priv->device->dev, - "Couldn't change QP%d state to INIT: %d\n", - i, ret); - goto out; - } - - attr->qp_state = IB_QPS_RTR; - ret = ib_modify_qp(qp, attr, IB_QP_STATE); - if (ret) { - dev_err(&port_priv->device->dev, - "Couldn't change QP%d state to RTR: %d\n", - i, ret); - goto out; - } - - attr->qp_state = IB_QPS_RTS; - attr->sq_psn = IB_MAD_SEND_Q_PSN; - ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); - if (ret) { - dev_err(&port_priv->device->dev, - "Couldn't change QP%d state to RTS: %d\n", - i, ret); - goto out; - } - } - - ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); - if (ret) { - dev_err(&port_priv->device->dev, - "Failed to request completion notification: %d\n", - ret); - goto out; - } - - for (i = 0; i < IB_MAD_QPS_CORE; i++) { - if (!port_priv->qp_info[i].qp) - continue; - - ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); - if (ret) { - dev_err(&port_priv->device->dev, - "Couldn't post receive WRs\n"); - goto out; - } - } -out: - kfree(attr); - return ret; -} - -static void qp_event_handler(struct ib_event *event, void *qp_context) -{ - struct ib_mad_qp_info *qp_info = qp_context; - - /* It's worse than that! He's dead, Jim! */ - dev_err(&qp_info->port_priv->device->dev, - "Fatal error (%d) on MAD QP (%d)\n", - event->event, qp_info->qp->qp_num); -} - -static void init_mad_queue(struct ib_mad_qp_info *qp_info, - struct ib_mad_queue *mad_queue) -{ - mad_queue->qp_info = qp_info; - mad_queue->count = 0; - spin_lock_init(&mad_queue->lock); - INIT_LIST_HEAD(&mad_queue->list); -} - -static void init_mad_qp(struct ib_mad_port_private *port_priv, - struct ib_mad_qp_info *qp_info) -{ - qp_info->port_priv = port_priv; - init_mad_queue(qp_info, &qp_info->send_queue); - init_mad_queue(qp_info, &qp_info->recv_queue); - INIT_LIST_HEAD(&qp_info->overflow_list); - spin_lock_init(&qp_info->snoop_lock); - qp_info->snoop_table = NULL; - qp_info->snoop_table_size = 0; - atomic_set(&qp_info->snoop_count, 0); -} - -static int create_mad_qp(struct ib_mad_qp_info *qp_info, - enum ib_qp_type qp_type) -{ - struct ib_qp_init_attr qp_init_attr; - int ret; - - memset(&qp_init_attr, 0, sizeof qp_init_attr); - qp_init_attr.send_cq = qp_info->port_priv->cq; - qp_init_attr.recv_cq = qp_info->port_priv->cq; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.cap.max_send_wr = mad_sendq_size; - qp_init_attr.cap.max_recv_wr = mad_recvq_size; - qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; - qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; - qp_init_attr.qp_type = qp_type; - qp_init_attr.port_num = qp_info->port_priv->port_num; - qp_init_attr.qp_context = qp_info; - qp_init_attr.event_handler = qp_event_handler; - qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); - if (IS_ERR(qp_info->qp)) { - dev_err(&qp_info->port_priv->device->dev, - "Couldn't create ib_mad QP%d\n", - get_spl_qp_index(qp_type)); - ret = PTR_ERR(qp_info->qp); - goto error; - } - /* Use minimum queue sizes unless the CQ is resized */ - qp_info->send_queue.max_active = mad_sendq_size; - qp_info->recv_queue.max_active = mad_recvq_size; - return 0; - -error: - return ret; -} - -static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) -{ - if (!qp_info->qp) - return; - - ib_destroy_qp(qp_info->qp); - kfree(qp_info->snoop_table); -} - -/* - * Open the port - * Create the QP, PD, MR, and CQ if needed - */ -static int ib_mad_port_open(struct ib_device *device, - int port_num) -{ - int ret, cq_size; - struct ib_mad_port_private *port_priv; - unsigned long flags; - char name[sizeof "ib_mad123"]; - int has_smi; - - if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) - return -EFAULT; - - if (WARN_ON(rdma_cap_opa_mad(device, port_num) && - rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) - return -EFAULT; - - /* Create new device info */ - port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); - if (!port_priv) { - dev_err(&device->dev, "No memory for ib_mad_port_private\n"); - return -ENOMEM; - } - - port_priv->device = device; - port_priv->port_num = port_num; - spin_lock_init(&port_priv->reg_lock); - INIT_LIST_HEAD(&port_priv->agent_list); - init_mad_qp(port_priv, &port_priv->qp_info[0]); - init_mad_qp(port_priv, &port_priv->qp_info[1]); - - cq_size = mad_sendq_size + mad_recvq_size; - has_smi = rdma_cap_ib_smi(device, port_num); - if (has_smi) - cq_size *= 2; - - port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, - IB_POLL_WORKQUEUE); - if (IS_ERR(port_priv->cq)) { - dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); - ret = PTR_ERR(port_priv->cq); - goto error3; - } - - port_priv->pd = ib_alloc_pd(device, 0); - if (IS_ERR(port_priv->pd)) { - dev_err(&device->dev, "Couldn't create ib_mad PD\n"); - ret = PTR_ERR(port_priv->pd); - goto error4; - } - - if (has_smi) { - ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); - if (ret) - goto error6; - } - ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); - if (ret) - goto error7; - - snprintf(name, sizeof name, "ib_mad%d", port_num); - port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); - if (!port_priv->wq) { - ret = -ENOMEM; - goto error8; - } - - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - list_add_tail(&port_priv->port_list, &ib_mad_port_list); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - - ret = ib_mad_port_start(port_priv); - if (ret) { - dev_err(&device->dev, "Couldn't start port\n"); - goto error9; - } - - return 0; - -error9: - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - list_del_init(&port_priv->port_list); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - - destroy_workqueue(port_priv->wq); -error8: - destroy_mad_qp(&port_priv->qp_info[1]); -error7: - destroy_mad_qp(&port_priv->qp_info[0]); -error6: - ib_dealloc_pd(port_priv->pd); -error4: - ib_free_cq(port_priv->cq); - cleanup_recv_queue(&port_priv->qp_info[1]); - cleanup_recv_queue(&port_priv->qp_info[0]); -error3: - kfree(port_priv); - - return ret; -} - -/* - * Close the port - * If there are no classes using the port, free the port - * resources (CQ, MR, PD, QP) and remove the port's info structure - */ -static int ib_mad_port_close(struct ib_device *device, int port_num) -{ - struct ib_mad_port_private *port_priv; - unsigned long flags; - - spin_lock_irqsave(&ib_mad_port_list_lock, flags); - port_priv = __ib_get_mad_port(device, port_num); - if (port_priv == NULL) { - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - dev_err(&device->dev, "Port %d not found\n", port_num); - return -ENODEV; - } - list_del_init(&port_priv->port_list); - spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - - destroy_workqueue(port_priv->wq); - destroy_mad_qp(&port_priv->qp_info[1]); - destroy_mad_qp(&port_priv->qp_info[0]); - ib_dealloc_pd(port_priv->pd); - ib_free_cq(port_priv->cq); - cleanup_recv_queue(&port_priv->qp_info[1]); - cleanup_recv_queue(&port_priv->qp_info[0]); - /* XXX: Handle deallocation of MAD registration tables */ - - kfree(port_priv); - - return 0; -} - -static void ib_mad_init_device(struct ib_device *device) -{ - int start, i; - - start = rdma_start_port(device); - - for (i = start; i <= rdma_end_port(device); i++) { - if (!rdma_cap_ib_mad(device, i)) - continue; - - if (ib_mad_port_open(device, i)) { - dev_err(&device->dev, "Couldn't open port %d\n", i); - goto error; - } - if (ib_agent_port_open(device, i)) { - dev_err(&device->dev, - "Couldn't open port %d for agents\n", i); - goto error_agent; - } - } - return; - -error_agent: - if (ib_mad_port_close(device, i)) - dev_err(&device->dev, "Couldn't close port %d\n", i); - -error: - while (--i >= start) { - if (!rdma_cap_ib_mad(device, i)) - continue; - - if (ib_agent_port_close(device, i)) - dev_err(&device->dev, - "Couldn't close port %d for agents\n", i); - if (ib_mad_port_close(device, i)) - dev_err(&device->dev, "Couldn't close port %d\n", i); - } -} - -static void ib_mad_remove_device(struct ib_device *device, void *client_data) -{ - int i; - - for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { - if (!rdma_cap_ib_mad(device, i)) - continue; - - if (ib_agent_port_close(device, i)) - dev_err(&device->dev, - "Couldn't close port %d for agents\n", i); - if (ib_mad_port_close(device, i)) - dev_err(&device->dev, "Couldn't close port %d\n", i); - } -} - -static struct ib_client mad_client = { - .name = "mad", - .add = ib_mad_init_device, - .remove = ib_mad_remove_device -}; - -int ib_mad_init(void) -{ - mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); - mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); - - mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); - mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - - INIT_LIST_HEAD(&ib_mad_port_list); - - if (ib_register_client(&mad_client)) { - pr_err("Couldn't register ib_mad client\n"); - return -EINVAL; - } - - return 0; -} - -void ib_mad_cleanup(void) -{ - ib_unregister_client(&mad_client); -} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/mad.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/iwcm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/iwcm.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/iwcm.c (nonexistent) @@ -1,1050 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "iwcm.h" - -MODULE_AUTHOR("Tom Tucker"); -MODULE_DESCRIPTION("iWARP CM"); -MODULE_LICENSE("Dual BSD/GPL"); - -static struct workqueue_struct *iwcm_wq; -struct iwcm_work { - struct work_struct work; - struct iwcm_id_private *cm_id; - struct list_head list; - struct iw_cm_event event; - struct list_head free_list; -}; - -static unsigned int default_backlog = 256; - -/* - * The following services provide a mechanism for pre-allocating iwcm_work - * elements. The design pre-allocates them based on the cm_id type: - * LISTENING IDS: Get enough elements preallocated to handle the - * listen backlog. - * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE - * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE - * - * Allocating them in connect and listen avoids having to deal - * with allocation failures on the event upcall from the provider (which - * is called in the interrupt context). - * - * One exception is when creating the cm_id for incoming connection requests. - * There are two cases: - * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If - * the backlog is exceeded, then no more connection request events will - * be processed. cm_event_handler() returns -ENOMEM in this case. Its up - * to the provider to reject the connection request. - * 2) in the connection request workqueue handler, cm_conn_req_handler(). - * If work elements cannot be allocated for the new connect request cm_id, - * then IWCM will call the provider reject method. This is ok since - * cm_conn_req_handler() runs in the workqueue thread context. - */ - -static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) -{ - struct iwcm_work *work; - - if (list_empty(&cm_id_priv->work_free_list)) - return NULL; - work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, - free_list); - list_del_init(&work->free_list); - return work; -} - -static void put_work(struct iwcm_work *work) -{ - list_add(&work->free_list, &work->cm_id->work_free_list); -} - -static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) -{ - struct list_head *e, *tmp; - - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) - kfree(list_entry(e, struct iwcm_work, free_list)); -} - -static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) -{ - struct iwcm_work *work; - - BUG_ON(!list_empty(&cm_id_priv->work_free_list)); - while (count--) { - work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); - if (!work) { - dealloc_work_entries(cm_id_priv); - return -ENOMEM; - } - work->cm_id = cm_id_priv; - INIT_LIST_HEAD(&work->list); - put_work(work); - } - return 0; -} - -/* - * Save private data from incoming connection requests to - * iw_cm_event, so the low level driver doesn't have to. Adjust - * the event ptr to point to the local copy. - */ -static int copy_private_data(struct iw_cm_event *event) -{ - void *p; - - p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); - if (!p) - return -ENOMEM; - event->private_data = p; - return 0; -} - -static void free_cm_id(struct iwcm_id_private *cm_id_priv) -{ - dealloc_work_entries(cm_id_priv); - kfree(cm_id_priv); -} - -/* - * Release a reference on cm_id. If the last reference is being - * released, free the cm_id and return 1. - */ -static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) -{ - BUG_ON(atomic_read(&cm_id_priv->refcount)==0); - if (atomic_dec_and_test(&cm_id_priv->refcount)) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - return 1; - } - - return 0; -} - -static void add_ref(struct iw_cm_id *cm_id) -{ - struct iwcm_id_private *cm_id_priv; - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - atomic_inc(&cm_id_priv->refcount); -} - -static void rem_ref(struct iw_cm_id *cm_id) -{ - struct iwcm_id_private *cm_id_priv; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - - (void)iwcm_deref_id(cm_id_priv); -} - -static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); - -struct iw_cm_id *iw_create_cm_id(struct ib_device *device, - iw_cm_handler cm_handler, - void *context) -{ - struct iwcm_id_private *cm_id_priv; - - cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); - if (!cm_id_priv) - return ERR_PTR(-ENOMEM); - - cm_id_priv->state = IW_CM_STATE_IDLE; - cm_id_priv->id.device = device; - cm_id_priv->id.cm_handler = cm_handler; - cm_id_priv->id.context = context; - cm_id_priv->id.event_handler = cm_event_handler; - cm_id_priv->id.add_ref = add_ref; - cm_id_priv->id.rem_ref = rem_ref; - spin_lock_init(&cm_id_priv->lock); - atomic_set(&cm_id_priv->refcount, 1); - init_waitqueue_head(&cm_id_priv->connect_wait); - init_completion(&cm_id_priv->destroy_comp); - INIT_LIST_HEAD(&cm_id_priv->work_list); - INIT_LIST_HEAD(&cm_id_priv->work_free_list); - - return &cm_id_priv->id; -} -EXPORT_SYMBOL(iw_create_cm_id); - - -static int iwcm_modify_qp_err(struct ib_qp *qp) -{ - struct ib_qp_attr qp_attr; - - if (!qp) - return -EINVAL; - - qp_attr.qp_state = IB_QPS_ERR; - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); -} - -/* - * This is really the RDMAC CLOSING state. It is most similar to the - * IB SQD QP state. - */ -static int iwcm_modify_qp_sqd(struct ib_qp *qp) -{ - struct ib_qp_attr qp_attr; - - BUG_ON(qp == NULL); - qp_attr.qp_state = IB_QPS_SQD; - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); -} - -/* - * CM_ID <-- CLOSING - * - * Block if a passive or active connection is currently being processed. Then - * process the event as follows: - * - If we are ESTABLISHED, move to CLOSING and modify the QP state - * based on the abrupt flag - * - If the connection is already in the CLOSING or IDLE state, the peer is - * disconnecting concurrently with us and we've already seen the - * DISCONNECT event -- ignore the request and return 0 - * - Disconnect on a listening endpoint returns -EINVAL - */ -int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) -{ - struct iwcm_id_private *cm_id_priv; - unsigned long flags; - int ret = 0; - struct ib_qp *qp = NULL; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - /* Wait if we're currently in a connect or accept downcall */ - wait_event(cm_id_priv->connect_wait, - !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->state) { - case IW_CM_STATE_ESTABLISHED: - cm_id_priv->state = IW_CM_STATE_CLOSING; - - /* QP could be for user-mode client */ - if (cm_id_priv->qp) - qp = cm_id_priv->qp; - else - ret = -EINVAL; - break; - case IW_CM_STATE_LISTEN: - ret = -EINVAL; - break; - case IW_CM_STATE_CLOSING: - /* remote peer closed first */ - case IW_CM_STATE_IDLE: - /* accept or connect returned !0 */ - break; - case IW_CM_STATE_CONN_RECV: - /* - * App called disconnect before/without calling accept after - * connect_request event delivered. - */ - break; - case IW_CM_STATE_CONN_SENT: - /* Can only get here if wait above fails */ - default: - BUG(); - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - if (qp) { - if (abrupt) - ret = iwcm_modify_qp_err(qp); - else - ret = iwcm_modify_qp_sqd(qp); - - /* - * If both sides are disconnecting the QP could - * already be in ERR or SQD states - */ - ret = 0; - } - - return ret; -} -EXPORT_SYMBOL(iw_cm_disconnect); - -/* - * CM_ID <-- DESTROYING - * - * Clean up all resources associated with the connection and release - * the initial reference taken by iw_create_cm_id. - */ -static void destroy_cm_id(struct iw_cm_id *cm_id) -{ - struct iwcm_id_private *cm_id_priv; - unsigned long flags; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - /* - * Wait if we're currently in a connect or accept downcall. A - * listening endpoint should never block here. - */ - wait_event(cm_id_priv->connect_wait, - !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); - - /* - * Since we're deleting the cm_id, drop any events that - * might arrive before the last dereference. - */ - set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->state) { - case IW_CM_STATE_LISTEN: - cm_id_priv->state = IW_CM_STATE_DESTROYING; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - /* destroy the listening endpoint */ - cm_id->device->iwcm->destroy_listen(cm_id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - break; - case IW_CM_STATE_ESTABLISHED: - cm_id_priv->state = IW_CM_STATE_DESTROYING; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - /* Abrupt close of the connection */ - (void)iwcm_modify_qp_err(cm_id_priv->qp); - spin_lock_irqsave(&cm_id_priv->lock, flags); - break; - case IW_CM_STATE_IDLE: - case IW_CM_STATE_CLOSING: - cm_id_priv->state = IW_CM_STATE_DESTROYING; - break; - case IW_CM_STATE_CONN_RECV: - /* - * App called destroy before/without calling accept after - * receiving connection request event notification or - * returned non zero from the event callback function. - * In either case, must tell the provider to reject. - */ - cm_id_priv->state = IW_CM_STATE_DESTROYING; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_id->device->iwcm->reject(cm_id, NULL, 0); - spin_lock_irqsave(&cm_id_priv->lock, flags); - break; - case IW_CM_STATE_CONN_SENT: - case IW_CM_STATE_DESTROYING: - default: - BUG(); - break; - } - if (cm_id_priv->qp) { - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - (void)iwcm_deref_id(cm_id_priv); -} - -/* - * This function is only called by the application thread and cannot - * be called by the event thread. The function will wait for all - * references to be released on the cm_id and then kfree the cm_id - * object. - */ -void iw_destroy_cm_id(struct iw_cm_id *cm_id) -{ - struct iwcm_id_private *cm_id_priv; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - destroy_cm_id(cm_id); -} -EXPORT_SYMBOL(iw_destroy_cm_id); - -/** - * iw_cm_map - Use portmapper to map the ports - * @cm_id: connection manager pointer - * @active: Indicates the active side when true - * returns nonzero for error only if iwpm_create_mapinfo() fails - * - * Tries to add a mapping for a port using the Portmapper. If - * successful in mapping the IP/Port it will check the remote - * mapped IP address for a wildcard IP address and replace the - * zero IP address with the remote_addr. - */ -static int iw_cm_map(struct iw_cm_id *cm_id, bool active) -{ - cm_id->m_local_addr = cm_id->local_addr; - cm_id->m_remote_addr = cm_id->remote_addr; - - return 0; -} - -/* - * CM_ID <-- LISTEN - * - * Start listening for connect requests. Generates one CONNECT_REQUEST - * event for each inbound connect request. - */ -int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) -{ - struct iwcm_id_private *cm_id_priv; - unsigned long flags; - int ret; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - - if (!backlog) - backlog = default_backlog; - - ret = alloc_work_entries(cm_id_priv, backlog); - if (ret) - return ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->state) { - case IW_CM_STATE_IDLE: - cm_id_priv->state = IW_CM_STATE_LISTEN; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = iw_cm_map(cm_id, false); - if (!ret) - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); - if (ret) - cm_id_priv->state = IW_CM_STATE_IDLE; - spin_lock_irqsave(&cm_id_priv->lock, flags); - break; - default: - ret = -EINVAL; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - return ret; -} -EXPORT_SYMBOL(iw_cm_listen); - -/* - * CM_ID <-- IDLE - * - * Rejects an inbound connection request. No events are generated. - */ -int iw_cm_reject(struct iw_cm_id *cm_id, - const void *private_data, - u8 private_data_len) -{ - struct iwcm_id_private *cm_id_priv; - unsigned long flags; - int ret; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; - } - cm_id_priv->state = IW_CM_STATE_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - ret = cm_id->device->iwcm->reject(cm_id, private_data, - private_data_len); - - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - - return ret; -} -EXPORT_SYMBOL(iw_cm_reject); - -/* - * CM_ID <-- ESTABLISHED - * - * Accepts an inbound connection request and generates an ESTABLISHED - * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block - * until the ESTABLISHED event is received from the provider. - */ -int iw_cm_accept(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *iw_param) -{ - struct iwcm_id_private *cm_id_priv; - struct ib_qp *qp; - unsigned long flags; - int ret; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; - } - /* Get the ib_qp given the QPN */ - qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); - if (!qp) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return -EINVAL; - } - cm_id->device->iwcm->add_ref(qp); - cm_id_priv->qp = qp; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - ret = cm_id->device->iwcm->accept(cm_id, iw_param); - if (ret) { - /* An error on accept precludes provider events */ - BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); - cm_id_priv->state = IW_CM_STATE_IDLE; - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); - cm_id_priv->qp = NULL; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - } - - return ret; -} -EXPORT_SYMBOL(iw_cm_accept); - -/* - * Active Side: CM_ID <-- CONN_SENT - * - * If successful, results in the generation of a CONNECT_REPLY - * event. iw_cm_disconnect and iw_cm_destroy will block until the - * CONNECT_REPLY event is received from the provider. - */ -int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) -{ - struct iwcm_id_private *cm_id_priv; - int ret; - unsigned long flags; - struct ib_qp *qp; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - - ret = alloc_work_entries(cm_id_priv, 4); - if (ret) - return ret; - - set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - spin_lock_irqsave(&cm_id_priv->lock, flags); - - if (cm_id_priv->state != IW_CM_STATE_IDLE) { - ret = -EINVAL; - goto err; - } - - /* Get the ib_qp given the QPN */ - qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); - if (!qp) { - ret = -EINVAL; - goto err; - } - cm_id->device->iwcm->add_ref(qp); - cm_id_priv->qp = qp; - cm_id_priv->state = IW_CM_STATE_CONN_SENT; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - ret = iw_cm_map(cm_id, true); - if (!ret) - ret = cm_id->device->iwcm->connect(cm_id, iw_param); - if (!ret) - return 0; /* success */ - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); - cm_id_priv->qp = NULL; - } - cm_id_priv->state = IW_CM_STATE_IDLE; -err: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - wake_up_all(&cm_id_priv->connect_wait); - return ret; -} -EXPORT_SYMBOL(iw_cm_connect); - -/* - * Passive Side: new CM_ID <-- CONN_RECV - * - * Handles an inbound connect request. The function creates a new - * iw_cm_id to represent the new connection and inherits the client - * callback function and other attributes from the listening parent. - * - * The work item contains a pointer to the listen_cm_id and the event. The - * listen_cm_id contains the client cm_handler, context and - * device. These are copied when the device is cloned. The event - * contains the new four tuple. - * - * An error on the child should not affect the parent, so this - * function does not return a value. - */ -static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, - struct iw_cm_event *iw_event) -{ - unsigned long flags; - struct iw_cm_id *cm_id; - struct iwcm_id_private *cm_id_priv; - int ret; - - /* - * The provider should never generate a connection request - * event with a bad status. - */ - BUG_ON(iw_event->status); - - cm_id = iw_create_cm_id(listen_id_priv->id.device, - listen_id_priv->id.cm_handler, - listen_id_priv->id.context); - /* If the cm_id could not be created, ignore the request */ - if (IS_ERR(cm_id)) - goto out; - - cm_id->provider_data = iw_event->provider_data; - cm_id->m_local_addr = iw_event->local_addr; - cm_id->m_remote_addr = iw_event->remote_addr; - cm_id->local_addr = listen_id_priv->id.local_addr; - cm_id->remote_addr = iw_event->remote_addr; - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - cm_id_priv->state = IW_CM_STATE_CONN_RECV; - - /* - * We could be destroying the listening id. If so, ignore this - * upcall. - */ - spin_lock_irqsave(&listen_id_priv->lock, flags); - if (listen_id_priv->state != IW_CM_STATE_LISTEN) { - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - iw_cm_reject(cm_id, NULL, 0); - iw_destroy_cm_id(cm_id); - goto out; - } - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - - ret = alloc_work_entries(cm_id_priv, 3); - if (ret) { - iw_cm_reject(cm_id, NULL, 0); - iw_destroy_cm_id(cm_id); - goto out; - } - - /* Call the client CM handler */ - ret = cm_id->cm_handler(cm_id, iw_event); - if (ret) { - iw_cm_reject(cm_id, NULL, 0); - iw_destroy_cm_id(cm_id); - } - -out: - if (iw_event->private_data_len) - kfree(iw_event->private_data); -} - -/* - * Passive Side: CM_ID <-- ESTABLISHED - * - * The provider generated an ESTABLISHED event which means that - * the MPA negotion has completed successfully and we are now in MPA - * FPDU mode. - * - * This event can only be received in the CONN_RECV state. If the - * remote peer closed, the ESTABLISHED event would be received followed - * by the CLOSE event. If the app closes, it will block until we wake - * it up after processing this event. - */ -static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, - struct iw_cm_event *iw_event) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - - /* - * We clear the CONNECT_WAIT bit here to allow the callback - * function to call iw_cm_disconnect. Calling iw_destroy_cm_id - * from a callback handler is not allowed. - */ - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); - cm_id_priv->state = IW_CM_STATE_ESTABLISHED; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - wake_up_all(&cm_id_priv->connect_wait); - - return ret; -} - -/* - * Active Side: CM_ID <-- ESTABLISHED - * - * The app has called connect and is waiting for the established event to - * post it's requests to the server. This event will wake up anyone - * blocked in iw_cm_disconnect or iw_destroy_id. - */ -static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, - struct iw_cm_event *iw_event) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - /* - * Clear the connect wait bit so a callback function calling - * iw_cm_disconnect will not wait and deadlock this thread - */ - clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); - BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); - if (iw_event->status == 0) { - cm_id_priv->id.m_local_addr = iw_event->local_addr; - cm_id_priv->id.m_remote_addr = iw_event->remote_addr; - iw_event->local_addr = cm_id_priv->id.local_addr; - iw_event->remote_addr = cm_id_priv->id.remote_addr; - cm_id_priv->state = IW_CM_STATE_ESTABLISHED; - } else { - /* REJECTED or RESET */ - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - cm_id_priv->state = IW_CM_STATE_IDLE; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - - if (iw_event->private_data_len) - kfree(iw_event->private_data); - - /* Wake up waiters on connect complete */ - wake_up_all(&cm_id_priv->connect_wait); - - return ret; -} - -/* - * CM_ID <-- CLOSING - * - * If in the ESTABLISHED state, move to CLOSING. - */ -static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, - struct iw_cm_event *iw_event) -{ - unsigned long flags; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) - cm_id_priv->state = IW_CM_STATE_CLOSING; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); -} - -/* - * CM_ID <-- IDLE - * - * If in the ESTBLISHED or CLOSING states, the QP will have have been - * moved by the provider to the ERR state. Disassociate the CM_ID from - * the QP, move to IDLE, and remove the 'connected' reference. - * - * If in some other state, the cm_id was destroyed asynchronously. - * This is the last reference that will result in waking up - * the app thread blocked in iw_destroy_cm_id. - */ -static int cm_close_handler(struct iwcm_id_private *cm_id_priv, - struct iw_cm_event *iw_event) -{ - unsigned long flags; - int ret = 0; - spin_lock_irqsave(&cm_id_priv->lock, flags); - - if (cm_id_priv->qp) { - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } - switch (cm_id_priv->state) { - case IW_CM_STATE_ESTABLISHED: - case IW_CM_STATE_CLOSING: - cm_id_priv->state = IW_CM_STATE_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - spin_lock_irqsave(&cm_id_priv->lock, flags); - break; - case IW_CM_STATE_DESTROYING: - break; - default: - BUG(); - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - return ret; -} - -static int process_event(struct iwcm_id_private *cm_id_priv, - struct iw_cm_event *iw_event) -{ - int ret = 0; - - switch (iw_event->event) { - case IW_CM_EVENT_CONNECT_REQUEST: - cm_conn_req_handler(cm_id_priv, iw_event); - break; - case IW_CM_EVENT_CONNECT_REPLY: - ret = cm_conn_rep_handler(cm_id_priv, iw_event); - break; - case IW_CM_EVENT_ESTABLISHED: - ret = cm_conn_est_handler(cm_id_priv, iw_event); - break; - case IW_CM_EVENT_DISCONNECT: - cm_disconnect_handler(cm_id_priv, iw_event); - break; - case IW_CM_EVENT_CLOSE: - ret = cm_close_handler(cm_id_priv, iw_event); - break; - default: - BUG(); - } - - return ret; -} - -/* - * Process events on the work_list for the cm_id. If the callback - * function requests that the cm_id be deleted, a flag is set in the - * cm_id flags to indicate that when the last reference is - * removed, the cm_id is to be destroyed. This is necessary to - * distinguish between an object that will be destroyed by the app - * thread asleep on the destroy_comp list vs. an object destroyed - * here synchronously when the last reference is removed. - */ -static void cm_work_handler(struct work_struct *_work) -{ - struct iwcm_work *work = container_of(_work, struct iwcm_work, work); - struct iw_cm_event levent; - struct iwcm_id_private *cm_id_priv = work->cm_id; - unsigned long flags; - int empty; - int ret = 0; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - empty = list_empty(&cm_id_priv->work_list); - while (!empty) { - work = list_entry(cm_id_priv->work_list.next, - struct iwcm_work, list); - list_del_init(&work->list); - empty = list_empty(&cm_id_priv->work_list); - levent = work->event; - put_work(work); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { - ret = process_event(cm_id_priv, &levent); - if (ret) - destroy_cm_id(&cm_id_priv->id); - } else - pr_debug("dropping event %d\n", levent.event); - if (iwcm_deref_id(cm_id_priv)) - return; - if (empty) - return; - spin_lock_irqsave(&cm_id_priv->lock, flags); - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); -} - -/* - * This function is called on interrupt context. Schedule events on - * the iwcm_wq thread to allow callback functions to downcall into - * the CM and/or block. Events are queued to a per-CM_ID - * work_list. If this is the first event on the work_list, the work - * element is also queued on the iwcm_wq thread. - * - * Each event holds a reference on the cm_id. Until the last posted - * event has been delivered and processed, the cm_id cannot be - * deleted. - * - * Returns: - * 0 - the event was handled. - * -ENOMEM - the event was not handled due to lack of resources. - */ -static int cm_event_handler(struct iw_cm_id *cm_id, - struct iw_cm_event *iw_event) -{ - struct iwcm_work *work; - struct iwcm_id_private *cm_id_priv; - unsigned long flags; - int ret = 0; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - work = get_work(cm_id_priv); - if (!work) { - ret = -ENOMEM; - goto out; - } - - INIT_WORK(&work->work, cm_work_handler); - work->cm_id = cm_id_priv; - work->event = *iw_event; - - if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || - work->event.event == IW_CM_EVENT_CONNECT_REPLY) && - work->event.private_data_len) { - ret = copy_private_data(&work->event); - if (ret) { - put_work(work); - goto out; - } - } - - atomic_inc(&cm_id_priv->refcount); - if (list_empty(&cm_id_priv->work_list)) { - list_add_tail(&work->list, &cm_id_priv->work_list); - queue_work(iwcm_wq, &work->work); - } else - list_add_tail(&work->list, &cm_id_priv->work_list); -out: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} - -static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->state) { - case IW_CM_STATE_IDLE: - case IW_CM_STATE_CONN_SENT: - case IW_CM_STATE_CONN_RECV: - case IW_CM_STATE_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; - qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| - IB_ACCESS_REMOTE_READ; - ret = 0; - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} - -static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->state) { - case IW_CM_STATE_IDLE: - case IW_CM_STATE_CONN_SENT: - case IW_CM_STATE_CONN_RECV: - case IW_CM_STATE_ESTABLISHED: - *qp_attr_mask = 0; - ret = 0; - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} - -int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - struct iwcm_id_private *cm_id_priv; - int ret; - - cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - switch (qp_attr->qp_state) { - case IB_QPS_INIT: - case IB_QPS_RTR: - ret = iwcm_init_qp_init_attr(cm_id_priv, - qp_attr, qp_attr_mask); - break; - case IB_QPS_RTS: - ret = iwcm_init_qp_rts_attr(cm_id_priv, - qp_attr, qp_attr_mask); - break; - default: - ret = -EINVAL; - break; - } - return ret; -} -EXPORT_SYMBOL(iw_cm_init_qp_attr); - -static int __init iw_cm_init(void) -{ - iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); - if (!iwcm_wq) - return -ENOMEM; - - return 0; -} - -static void __exit iw_cm_cleanup(void) -{ - destroy_workqueue(iwcm_wq); -} - -module_init(iw_cm_init); -module_exit(iw_cm_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/iwcm.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_marshall.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_marshall.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_marshall.c (nonexistent) @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, - struct ib_ah_attr *src) -{ - memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); - dst->grh.flow_label = src->grh.flow_label; - dst->grh.sgid_index = src->grh.sgid_index; - dst->grh.hop_limit = src->grh.hop_limit; - dst->grh.traffic_class = src->grh.traffic_class; - memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); - dst->dlid = src->dlid; - dst->sl = src->sl; - dst->src_path_bits = src->src_path_bits; - dst->static_rate = src->static_rate; - dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; - dst->port_num = src->port_num; - dst->reserved = 0; -} -EXPORT_SYMBOL(ib_copy_ah_attr_to_user); - -void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, - struct ib_qp_attr *src) -{ - dst->qp_state = src->qp_state; - dst->cur_qp_state = src->cur_qp_state; - dst->path_mtu = src->path_mtu; - dst->path_mig_state = src->path_mig_state; - dst->qkey = src->qkey; - dst->rq_psn = src->rq_psn; - dst->sq_psn = src->sq_psn; - dst->dest_qp_num = src->dest_qp_num; - dst->qp_access_flags = src->qp_access_flags; - - dst->max_send_wr = src->cap.max_send_wr; - dst->max_recv_wr = src->cap.max_recv_wr; - dst->max_send_sge = src->cap.max_send_sge; - dst->max_recv_sge = src->cap.max_recv_sge; - dst->max_inline_data = src->cap.max_inline_data; - - ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); - ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); - - dst->pkey_index = src->pkey_index; - dst->alt_pkey_index = src->alt_pkey_index; - dst->en_sqd_async_notify = src->en_sqd_async_notify; - dst->sq_draining = src->sq_draining; - dst->max_rd_atomic = src->max_rd_atomic; - dst->max_dest_rd_atomic = src->max_dest_rd_atomic; - dst->min_rnr_timer = src->min_rnr_timer; - dst->port_num = src->port_num; - dst->timeout = src->timeout; - dst->retry_cnt = src->retry_cnt; - dst->rnr_retry = src->rnr_retry; - dst->alt_port_num = src->alt_port_num; - dst->alt_timeout = src->alt_timeout; - memset(dst->reserved, 0, sizeof(dst->reserved)); -} -EXPORT_SYMBOL(ib_copy_qp_attr_to_user); - -void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, - struct ib_sa_path_rec *src) -{ - memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); - memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); - - dst->dlid = src->dlid; - dst->slid = src->slid; - dst->raw_traffic = src->raw_traffic; - dst->flow_label = src->flow_label; - dst->hop_limit = src->hop_limit; - dst->traffic_class = src->traffic_class; - dst->reversible = src->reversible; - dst->numb_path = src->numb_path; - dst->pkey = src->pkey; - dst->sl = src->sl; - dst->mtu_selector = src->mtu_selector; - dst->mtu = src->mtu; - dst->rate_selector = src->rate_selector; - dst->rate = src->rate; - dst->packet_life_time = src->packet_life_time; - dst->preference = src->preference; - dst->packet_life_time_selector = src->packet_life_time_selector; -} -EXPORT_SYMBOL(ib_copy_path_rec_to_user); - -void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, - struct ib_user_path_rec *src) -{ - memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); - memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); - - dst->dlid = src->dlid; - dst->slid = src->slid; - dst->raw_traffic = src->raw_traffic; - dst->flow_label = src->flow_label; - dst->hop_limit = src->hop_limit; - dst->traffic_class = src->traffic_class; - dst->reversible = src->reversible; - dst->numb_path = src->numb_path; - dst->pkey = src->pkey; - dst->sl = src->sl; - dst->mtu_selector = src->mtu_selector; - dst->mtu = src->mtu; - dst->rate_selector = src->rate_selector; - dst->rate = src->rate; - dst->packet_life_time = src->packet_life_time; - dst->preference = src->preference; - dst->packet_life_time_selector = src->packet_life_time_selector; - - memset(dst->dmac, 0, sizeof(dst->dmac)); - dst->net = NULL; - dst->ifindex = 0; - dst->gid_type = IB_GID_TYPE_IB; -} -EXPORT_SYMBOL(ib_copy_path_rec_from_user); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_marshall.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucm.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucm.c (nonexistent) @@ -1,1371 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -MODULE_AUTHOR("Libor Michalek"); -MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); -MODULE_LICENSE("Dual BSD/GPL"); - -struct ib_ucm_device { - int devnum; - struct cdev cdev; - struct device dev; - struct ib_device *ib_dev; -}; - -struct ib_ucm_file { - struct mutex file_mutex; - struct file *filp; - struct ib_ucm_device *device; - - struct list_head ctxs; - struct list_head events; - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - struct completion comp; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -enum { - IB_UCM_MAJOR = 231, - IB_UCM_BASE_MINOR = 224, - IB_UCM_MAX_DEVICES = 32 -}; - -#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) - -static void ib_ucm_add_one(struct ib_device *device); -static void ib_ucm_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client ucm_client = { - .name = "ucm", - .add = ib_ucm_add_one, - .remove = ib_ucm_remove_one -}; - -static DEFINE_MUTEX(ctx_id_mutex); -static DEFINE_IDR(ctx_id_table); -static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); - -static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) -{ - struct ib_ucm_context *ctx; - - mutex_lock(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - atomic_inc(&ctx->ref); - mutex_unlock(&ctx_id_mutex); - - return ctx; -} - -static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) -{ - if (atomic_dec_and_test(&ctx->ref)) - complete(&ctx->comp); -} - -static inline int ib_ucm_new_cm_id(int event) -{ - return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; -} - -static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) -{ - struct ib_ucm_event *uevent; - - mutex_lock(&ctx->file->file_mutex); - list_del(&ctx->file_list); - while (!list_empty(&ctx->events)) { - - uevent = list_entry(ctx->events.next, - struct ib_ucm_event, ctx_list); - list_del(&uevent->file_list); - list_del(&uevent->ctx_list); - mutex_unlock(&ctx->file->file_mutex); - - /* clear incoming connections. */ - if (ib_ucm_new_cm_id(uevent->resp.event)) - ib_destroy_cm_id(uevent->cm_id); - - kfree(uevent); - mutex_lock(&ctx->file->file_mutex); - } - mutex_unlock(&ctx->file->file_mutex); -} - -static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) -{ - struct ib_ucm_context *ctx; - - ctx = kzalloc(sizeof *ctx, GFP_KERNEL); - if (!ctx) - return NULL; - - atomic_set(&ctx->ref, 1); - init_completion(&ctx->comp); - ctx->file = file; - INIT_LIST_HEAD(&ctx->events); - - mutex_lock(&ctx_id_mutex); - ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&ctx_id_mutex); - if (ctx->id < 0) - goto error; - - list_add_tail(&ctx->file_list, &file->ctxs); - return ctx; - -error: - kfree(ctx); - return NULL; -} - -static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, - struct ib_cm_req_event_param *kreq) -{ - ureq->remote_ca_guid = kreq->remote_ca_guid; - ureq->remote_qkey = kreq->remote_qkey; - ureq->remote_qpn = kreq->remote_qpn; - ureq->qp_type = kreq->qp_type; - ureq->starting_psn = kreq->starting_psn; - ureq->responder_resources = kreq->responder_resources; - ureq->initiator_depth = kreq->initiator_depth; - ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; - ureq->flow_control = kreq->flow_control; - ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; - ureq->retry_count = kreq->retry_count; - ureq->rnr_retry_count = kreq->rnr_retry_count; - ureq->srq = kreq->srq; - ureq->port = kreq->port; - - ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); - if (kreq->alternate_path) - ib_copy_path_rec_to_user(&ureq->alternate_path, - kreq->alternate_path); -} - -static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, - struct ib_cm_rep_event_param *krep) -{ - urep->remote_ca_guid = krep->remote_ca_guid; - urep->remote_qkey = krep->remote_qkey; - urep->remote_qpn = krep->remote_qpn; - urep->starting_psn = krep->starting_psn; - urep->responder_resources = krep->responder_resources; - urep->initiator_depth = krep->initiator_depth; - urep->target_ack_delay = krep->target_ack_delay; - urep->failover_accepted = krep->failover_accepted; - urep->flow_control = krep->flow_control; - urep->rnr_retry_count = krep->rnr_retry_count; - urep->srq = krep->srq; -} - -static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, - struct ib_cm_sidr_rep_event_param *krep) -{ - urep->status = krep->status; - urep->qkey = krep->qkey; - urep->qpn = krep->qpn; -}; - -static int ib_ucm_event_process(struct ib_cm_event *evt, - struct ib_ucm_event *uvt) -{ - void *info = NULL; - - switch (evt->event) { - case IB_CM_REQ_RECEIVED: - ib_ucm_event_req_get(&uvt->resp.u.req_resp, - &evt->param.req_rcvd); - uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; - uvt->resp.present = IB_UCM_PRES_PRIMARY; - uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? - IB_UCM_PRES_ALTERNATE : 0); - break; - case IB_CM_REP_RECEIVED: - ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, - &evt->param.rep_rcvd); - uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; - break; - case IB_CM_RTU_RECEIVED: - uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_DREQ_RECEIVED: - uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_DREP_RECEIVED: - uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_MRA_RECEIVED: - uvt->resp.u.mra_resp.timeout = - evt->param.mra_rcvd.service_timeout; - uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; - break; - case IB_CM_REJ_RECEIVED: - uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; - uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.rej_rcvd.ari_length; - info = evt->param.rej_rcvd.ari; - break; - case IB_CM_LAP_RECEIVED: - ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, - evt->param.lap_rcvd.alternate_path); - uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; - uvt->resp.present = IB_UCM_PRES_ALTERNATE; - break; - case IB_CM_APR_RECEIVED: - uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; - uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.apr_rcvd.info_len; - info = evt->param.apr_rcvd.apr_info; - break; - case IB_CM_SIDR_REQ_RECEIVED: - uvt->resp.u.sidr_req_resp.pkey = - evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = - evt->param.sidr_req_rcvd.port; - uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; - break; - case IB_CM_SIDR_REP_RECEIVED: - ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, - &evt->param.sidr_rep_rcvd); - uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.sidr_rep_rcvd.info_len; - info = evt->param.sidr_rep_rcvd.info; - break; - default: - uvt->resp.u.send_status = evt->param.send_status; - break; - } - - if (uvt->data_len) { - uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); - if (!uvt->data) - goto err1; - - uvt->resp.present |= IB_UCM_PRES_DATA; - } - - if (uvt->info_len) { - uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); - if (!uvt->info) - goto err2; - - uvt->resp.present |= IB_UCM_PRES_INFO; - } - return 0; - -err2: - kfree(uvt->data); -err1: - return -ENOMEM; -} - -static int ib_ucm_event_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) -{ - struct ib_ucm_event *uevent; - struct ib_ucm_context *ctx; - int result = 0; - - ctx = cm_id->context; - - uevent = kzalloc(sizeof *uevent, GFP_KERNEL); - if (!uevent) - goto err1; - - uevent->ctx = ctx; - uevent->cm_id = cm_id; - uevent->resp.uid = ctx->uid; - uevent->resp.id = ctx->id; - uevent->resp.event = event->event; - - result = ib_ucm_event_process(event, uevent); - if (result) - goto err2; - - mutex_lock(&ctx->file->file_mutex); - list_add_tail(&uevent->file_list, &ctx->file->events); - list_add_tail(&uevent->ctx_list, &ctx->events); - wake_up_interruptible(&ctx->file->poll_wait); - linux_poll_wakeup(ctx->file->filp); - mutex_unlock(&ctx->file->file_mutex); - return 0; - -err2: - kfree(uevent); -err1: - /* Destroy new cm_id's */ - return ib_ucm_new_cm_id(event->event); -} - -static ssize_t ib_ucm_event(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_event_get cmd; - struct ib_ucm_event *uevent; - int result = 0; - - if (out_len < sizeof(struct ib_ucm_event_resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->file_mutex); - while (list_empty(&file->events)) { - mutex_unlock(&file->file_mutex); - - if (file->filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->events))) - return -ERESTARTSYS; - - mutex_lock(&file->file_mutex); - } - - uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); - - if (ib_ucm_new_cm_id(uevent->resp.event)) { - ctx = ib_ucm_ctx_alloc(file); - if (!ctx) { - result = -ENOMEM; - goto done; - } - - ctx->cm_id = uevent->cm_id; - ctx->cm_id->context = ctx; - uevent->resp.id = ctx->id; - } - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &uevent->resp, sizeof(uevent->resp))) { - result = -EFAULT; - goto done; - } - - if (uevent->data) { - if (cmd.data_len < uevent->data_len) { - result = -ENOMEM; - goto done; - } - if (copy_to_user((void __user *)(unsigned long)cmd.data, - uevent->data, uevent->data_len)) { - result = -EFAULT; - goto done; - } - } - - if (uevent->info) { - if (cmd.info_len < uevent->info_len) { - result = -ENOMEM; - goto done; - } - if (copy_to_user((void __user *)(unsigned long)cmd.info, - uevent->info, uevent->info_len)) { - result = -EFAULT; - goto done; - } - } - - list_del(&uevent->file_list); - list_del(&uevent->ctx_list); - uevent->ctx->events_reported++; - - kfree(uevent->data); - kfree(uevent->info); - kfree(uevent); -done: - mutex_unlock(&file->file_mutex); - return result; -} - -static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_create_id cmd; - struct ib_ucm_create_id_resp resp; - struct ib_ucm_context *ctx; - int result; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->file_mutex); - ctx = ib_ucm_ctx_alloc(file); - mutex_unlock(&file->file_mutex); - if (!ctx) - return -ENOMEM; - - ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(file->device->ib_dev, - ib_ucm_event_handler, ctx); - if (IS_ERR(ctx->cm_id)) { - result = PTR_ERR(ctx->cm_id); - goto err1; - } - - resp.id = ctx->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) { - result = -EFAULT; - goto err2; - } - return 0; - -err2: - ib_destroy_cm_id(ctx->cm_id); -err1: - mutex_lock(&ctx_id_mutex); - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); - kfree(ctx); - return result; -} - -static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_destroy_id cmd; - struct ib_ucm_destroy_id_resp resp; - struct ib_ucm_context *ctx; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, cmd.id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ib_ucm_ctx_put(ctx); - wait_for_completion(&ctx->comp); - - /* No new events will be generated after destroying the cm_id. */ - ib_destroy_cm_id(ctx->cm_id); - /* Cleanup events not yet reported to the user. */ - ib_ucm_cleanup_events(ctx); - - resp.events_reported = ctx->events_reported; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - result = -EFAULT; - - kfree(ctx); - return result; -} - -static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_attr_id_resp resp; - struct ib_ucm_attr_id cmd; - struct ib_ucm_context *ctx; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.service_id = ctx->cm_id->service_id; - resp.service_mask = ctx->cm_id->service_mask; - resp.local_id = ctx->cm_id->local_id; - resp.remote_id = ctx->cm_id->remote_id; - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - result = -EFAULT; - - ib_ucm_ctx_put(ctx); - return result; -} - -static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_uverbs_qp_attr resp; - struct ib_ucm_init_qp_attr cmd; - struct ib_ucm_context *ctx; - struct ib_qp_attr qp_attr; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.qp_attr_mask = 0; - memset(&qp_attr, 0, sizeof qp_attr); - qp_attr.qp_state = cmd.qp_state; - result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); - if (result) - goto out; - - ib_copy_qp_attr_to_user(&resp, &qp_attr); - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - result = -EFAULT; - -out: - ib_ucm_ctx_put(ctx); - return result; -} - -static int ucm_validate_listen(__be64 service_id, __be64 service_mask) -{ - service_id &= service_mask; - - if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || - ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) - return -EINVAL; - - return 0; -} - -static ssize_t ib_ucm_listen(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_listen cmd; - struct ib_ucm_context *ctx; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - result = ucm_validate_listen(cmd.service_id, cmd.service_mask); - if (result) - goto out; - - result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); -out: - ib_ucm_ctx_put(ctx); - return result; -} - -static ssize_t ib_ucm_notify(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_notify cmd; - struct ib_ucm_context *ctx; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); - ib_ucm_ctx_put(ctx); - return result; -} - -static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) -{ - void *data; - - *dest = NULL; - - if (!len) - return 0; - - data = memdup_user((void __user *)(unsigned long)src, len); - if (IS_ERR(data)) - return PTR_ERR(data); - - *dest = data; - return 0; -} - -static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) -{ - struct ib_user_path_rec upath; - struct ib_sa_path_rec *sa_path; - - *path = NULL; - - if (!src) - return 0; - - sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); - if (!sa_path) - return -ENOMEM; - - if (copy_from_user(&upath, (void __user *)(unsigned long)src, - sizeof(upath))) { - - kfree(sa_path); - return -EFAULT; - } - - ib_copy_path_rec_from_user(sa_path, &upath); - *path = sa_path; - return 0; -} - -static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_req_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_req cmd; - int result; - - param.private_data = NULL; - param.primary_path = NULL; - param.alternate_path = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); - if (result) - goto done; - - param.private_data_len = cmd.len; - param.service_id = cmd.sid; - param.qp_num = cmd.qpn; - param.qp_type = cmd.qp_type; - param.starting_psn = cmd.psn; - param.peer_to_peer = cmd.peer_to_peer; - param.responder_resources = cmd.responder_resources; - param.initiator_depth = cmd.initiator_depth; - param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; - param.flow_control = cmd.flow_control; - param.local_cm_response_timeout = cmd.local_cm_response_timeout; - param.retry_count = cmd.retry_count; - param.rnr_retry_count = cmd.rnr_retry_count; - param.max_cm_retries = cmd.max_cm_retries; - param.srq = cmd.srq; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_req(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.primary_path); - kfree(param.alternate_path); - return result; -} - -static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_rep_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_rep cmd; - int result; - - param.private_data = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - return result; - - param.qp_num = cmd.qpn; - param.starting_psn = cmd.psn; - param.private_data_len = cmd.len; - param.responder_resources = cmd.responder_resources; - param.initiator_depth = cmd.initiator_depth; - param.failover_accepted = cmd.failover_accepted; - param.flow_control = cmd.flow_control; - param.rnr_retry_count = cmd.rnr_retry_count; - param.srq = cmd.srq; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - ctx->uid = cmd.uid; - result = ib_send_cm_rep(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(param.private_data); - return result; -} - -static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, - const char __user *inbuf, int in_len, - int (*func)(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len)) -{ - struct ib_ucm_private_data cmd; - struct ib_ucm_context *ctx; - const void *private_data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); - if (result) - return result; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = func(ctx->cm_id, private_data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(private_data); - return result; -} - -static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); -} - -static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); -} - -static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); -} - -static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, - const char __user *inbuf, int in_len, - int (*func)(struct ib_cm_id *cm_id, - int status, - const void *info, - u8 info_len, - const void *data, - u8 data_len)) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_info cmd; - const void *data = NULL; - const void *info = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); - if (result) - goto done; - - result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); - if (result) - goto done; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = func(ctx->cm_id, cmd.status, info, cmd.info_len, - data, cmd.data_len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(data); - kfree(info); - return result; -} - -static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); -} - -static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); -} - -static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_mra cmd; - const void *data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); - if (result) - return result; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(data); - return result; -} - -static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_sa_path_rec *path = NULL; - struct ib_ucm_lap cmd; - const void *data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(&path, cmd.path); - if (result) - goto done; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(data); - kfree(path); - return result; -} - -static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_sidr_req_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_sidr_req cmd; - int result; - - param.private_data = NULL; - param.path = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.path, cmd.path); - if (result) - goto done; - - param.private_data_len = cmd.len; - param.service_id = cmd.sid; - param.timeout_ms = cmd.timeout; - param.max_cm_retries = cmd.max_cm_retries; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.path); - return result; -} - -static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_sidr_rep_param param; - struct ib_ucm_sidr_rep cmd; - struct ib_ucm_context *ctx; - int result; - - param.info = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, - cmd.data, cmd.data_len); - if (result) - goto done; - - result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); - if (result) - goto done; - - param.qp_num = cmd.qpn; - param.qkey = cmd.qkey; - param.status = cmd.status; - param.info_length = cmd.info_len; - param.private_data_len = cmd.data_len; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.info); - return result; -} - -static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) = { - [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, - [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, - [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, - [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, - [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, - [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, - [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, - [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, - [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, - [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, - [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, - [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, - [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, - [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, - [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, - [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, - [IB_USER_CM_CMD_EVENT] = ib_ucm_event, - [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, -}; - -static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, - size_t len, loff_t *pos) -{ - struct ib_ucm_file *file = filp->private_data; - struct ib_ucm_cmd_hdr hdr; - ssize_t result; - - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) - return -EACCES; - - if (len < sizeof(hdr)) - return -EINVAL; - - if (copy_from_user(&hdr, buf, sizeof(hdr))) - return -EFAULT; - - if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) - return -EINVAL; - - if (hdr.in + sizeof(hdr) > len) - return -EINVAL; - - result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), - hdr.in, hdr.out); - if (!result) - result = len; - - return result; -} - -static unsigned int ib_ucm_poll(struct file *filp, - struct poll_table_struct *wait) -{ - struct ib_ucm_file *file = filp->private_data; - unsigned int mask = 0; - - poll_wait(filp, &file->poll_wait, wait); - - if (!list_empty(&file->events)) - mask = POLLIN | POLLRDNORM; - - return mask; -} - -/* - * ib_ucm_open() does not need the BKL: - * - * - no global state is referred to; - * - there is no ioctl method to race against; - * - no further module initialization is required for open to work - * after the device is registered. - */ -static int ib_ucm_open(struct inode *inode, struct file *filp) -{ - struct ib_ucm_file *file; - - file = kmalloc(sizeof(*file), GFP_KERNEL); - if (!file) - return -ENOMEM; - - INIT_LIST_HEAD(&file->events); - INIT_LIST_HEAD(&file->ctxs); - init_waitqueue_head(&file->poll_wait); - - mutex_init(&file->file_mutex); - - filp->private_data = file; - file->filp = filp; - file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); - - return nonseekable_open(inode, filp); -} - -static int ib_ucm_close(struct inode *inode, struct file *filp) -{ - struct ib_ucm_file *file = filp->private_data; - struct ib_ucm_context *ctx; - - mutex_lock(&file->file_mutex); - while (!list_empty(&file->ctxs)) { - ctx = list_entry(file->ctxs.next, - struct ib_ucm_context, file_list); - mutex_unlock(&file->file_mutex); - - mutex_lock(&ctx_id_mutex); - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); - - ib_destroy_cm_id(ctx->cm_id); - ib_ucm_cleanup_events(ctx); - kfree(ctx); - - mutex_lock(&file->file_mutex); - } - mutex_unlock(&file->file_mutex); - kfree(file); - return 0; -} - -static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); -static void ib_ucm_release_dev(struct device *dev) -{ - struct ib_ucm_device *ucm_dev; - - ucm_dev = container_of(dev, struct ib_ucm_device, dev); - cdev_del(&ucm_dev->cdev); - if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) - clear_bit(ucm_dev->devnum, dev_map); - else - clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); - kfree(ucm_dev); -} - -static const struct file_operations ucm_fops = { - .owner = THIS_MODULE, - .open = ib_ucm_open, - .release = ib_ucm_close, - .write = ib_ucm_write, - .poll = ib_ucm_poll, - .llseek = no_llseek, -}; - -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct ib_ucm_device *ucm_dev; - - ucm_dev = container_of(dev, struct ib_ucm_device, dev); - return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); -} -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - -static dev_t overflow_maj; -static int find_overflow_devnum(void) -{ - int ret; - - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, - "infiniband_cm"); - if (ret) { - pr_err("ucm: couldn't register dynamic device number\n"); - return ret; - } - } - - ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); - if (ret >= IB_UCM_MAX_DEVICES) - return -1; - - return ret; -} - -static void ib_ucm_add_one(struct ib_device *device) -{ - int devnum; - dev_t base; - struct ib_ucm_device *ucm_dev; - - if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) - return; - - ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); - if (!ucm_dev) - return; - - ucm_dev->ib_dev = device; - - devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (devnum >= IB_UCM_MAX_DEVICES) { - devnum = find_overflow_devnum(); - if (devnum < 0) - goto err; - - ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - ucm_dev->devnum = devnum; - base = devnum + IB_UCM_BASE_DEV; - set_bit(devnum, dev_map); - } - - cdev_init(&ucm_dev->cdev, &ucm_fops); - ucm_dev->cdev.owner = THIS_MODULE; - kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - if (cdev_add(&ucm_dev->cdev, base, 1)) - goto err; - - ucm_dev->dev.class = &cm_class; - ucm_dev->dev.parent = device->dma_device; - ucm_dev->dev.devt = ucm_dev->cdev.dev; - ucm_dev->dev.release = ib_ucm_release_dev; - dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); - if (device_register(&ucm_dev->dev)) - goto err_cdev; - - if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) - goto err_dev; - - ib_set_client_data(device, &ucm_client, ucm_dev); - return; - -err_dev: - device_unregister(&ucm_dev->dev); -err_cdev: - cdev_del(&ucm_dev->cdev); - if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); -err: - kfree(ucm_dev); - return; -} - -static void ib_ucm_remove_one(struct ib_device *device, void *client_data) -{ - struct ib_ucm_device *ucm_dev = client_data; - - if (!ucm_dev) - return; - - device_unregister(&ucm_dev->dev); -} - -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_CM_ABI_VERSION)); - -static int __init ib_ucm_init(void) -{ - int ret; - - ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, - "infiniband_cm"); - if (ret) { - pr_err("ucm: couldn't register device number\n"); - goto error1; - } - - ret = class_create_file(&cm_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("ucm: couldn't create abi_version attribute\n"); - goto error2; - } - - ret = ib_register_client(&ucm_client); - if (ret) { - pr_err("ucm: couldn't register client\n"); - goto error3; - } - return 0; - -error3: - class_remove_file(&cm_class, &class_attr_abi_version.attr); -error2: - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); -error1: - return ret; -} - -static void __exit ib_ucm_cleanup(void) -{ - ib_unregister_client(&ucm_client); - class_remove_file(&cm_class, &class_attr_abi_version.attr); - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); - idr_destroy(&ctx_id_table); -} - -module_init_order(ib_ucm_init, SI_ORDER_THIRD); -module_exit(ib_ucm_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucm.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_main.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_main.c (nonexistent) @@ -1,1392 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "uverbs.h" - -MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand userspace verbs access"); -MODULE_LICENSE("Dual BSD/GPL"); - -enum { - IB_UVERBS_MAJOR = 231, - IB_UVERBS_BASE_MINOR = 192, - IB_UVERBS_MAX_DEVICES = 32 -}; - -#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) - -static struct class *uverbs_class; - -DEFINE_SPINLOCK(ib_uverbs_idr_lock); -DEFINE_IDR(ib_uverbs_pd_idr); -DEFINE_IDR(ib_uverbs_mr_idr); -DEFINE_IDR(ib_uverbs_mw_idr); -DEFINE_IDR(ib_uverbs_ah_idr); -DEFINE_IDR(ib_uverbs_cq_idr); -DEFINE_IDR(ib_uverbs_qp_idr); -DEFINE_IDR(ib_uverbs_srq_idr); -DEFINE_IDR(ib_uverbs_xrcd_idr); -DEFINE_IDR(ib_uverbs_rule_idr); -DEFINE_IDR(ib_uverbs_wq_idr); -DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); - -static DEFINE_SPINLOCK(map_lock); -static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); - -static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, - [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, - [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, - [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, - [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, - [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, -}; - -static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) = { - [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, - [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, - [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, - [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, - [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, - [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, - [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, - [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, - [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, - [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, -}; - -static void ib_uverbs_add_one(struct ib_device *device); -static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); - -int uverbs_dealloc_mw(struct ib_mw *mw) -{ - struct ib_pd *pd = mw->pd; - int ret; - - ret = mw->device->dealloc_mw(mw); - if (!ret) - atomic_dec(&pd->usecnt); - return ret; -} - -static void ib_uverbs_release_dev(struct kobject *kobj) -{ - struct ib_uverbs_device *dev = - container_of(kobj, struct ib_uverbs_device, kobj); - - cleanup_srcu_struct(&dev->disassociate_srcu); - kfree(dev); -} - -static struct kobj_type ib_uverbs_dev_ktype = { - .release = ib_uverbs_release_dev, -}; - -static void ib_uverbs_release_event_file(struct kref *ref) -{ - struct ib_uverbs_event_file *file = - container_of(ref, struct ib_uverbs_event_file, ref); - - kfree(file); -} - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, - struct ib_ucq_object *uobj) -{ - struct ib_uverbs_event *evt, *tmp; - - if (ev_file) { - spin_lock_irq(&ev_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&ev_file->lock); - - kref_put(&ev_file->ref, ib_uverbs_release_event_file); - } - - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); -} - -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj) -{ - struct ib_uverbs_event *evt, *tmp; - - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); -} - -static void ib_uverbs_detach_umcast(struct ib_qp *qp, - struct ib_uqp_object *uobj) -{ - struct ib_uverbs_mcast_entry *mcast, *tmp; - - list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { - ib_detach_mcast(qp, &mcast->gid, mcast->lid); - list_del(&mcast->list); - kfree(mcast); - } -} - -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj, *tmp; - - context->closing = 1; - - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = uobj->object; - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - ib_destroy_ah(ah); - kfree(uobj); - } - - /* Remove MWs before QPs, in order to support type 2A MWs. */ - list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { - struct ib_mw *mw = uobj->object; - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - uverbs_dealloc_mw(mw); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { - struct ib_flow *flow_id = uobj->object; - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - ib_destroy_flow(flow_id); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = uobj->object; - struct ib_uqp_object *uqp = - container_of(uobj, struct ib_uqp_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp == qp->real_qp) - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - ib_uverbs_release_uevent(file, &uqp->uevent); - kfree(uqp); - } - - list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { - struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; - struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - ib_destroy_rwq_ind_table(rwq_ind_tbl); - kfree(ind_tbl); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { - struct ib_wq *wq = uobj->object; - struct ib_uwq_object *uwq = - container_of(uobj, struct ib_uwq_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - ib_destroy_wq(wq); - ib_uverbs_release_uevent(file, &uwq->uevent); - kfree(uwq); - } - - list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = uobj->object; - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - ib_destroy_srq(srq); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); - } - - list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = uobj->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - ib_destroy_cq(cq); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); - } - - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = uobj->object; - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - ib_dereg_mr(mr); - kfree(uobj); - } - - mutex_lock(&file->device->xrcd_tree_mutex); - list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { - struct ib_xrcd *xrcd = uobj->object; - struct ib_uxrcd_object *uxrcd = - container_of(uobj, struct ib_uxrcd_object, uobject); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - ib_uverbs_dealloc_xrcd(file->device, xrcd); - kfree(uxrcd); - } - mutex_unlock(&file->device->xrcd_tree_mutex); - - list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = uobj->object; - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - ib_dealloc_pd(pd); - kfree(uobj); - } - - put_pid(context->tgid); - - return context->device->dealloc_ucontext(context); -} - -static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) -{ - complete(&dev->comp); -} - -static void ib_uverbs_release_file(struct kref *ref) -{ - struct ib_uverbs_file *file = - container_of(ref, struct ib_uverbs_file, ref); - struct ib_device *ib_dev; - int srcu_key; - - srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (ib_dev && !ib_dev->disassociate_ucontext) - module_put(ib_dev->owner); - srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - - if (atomic_dec_and_test(&file->device->refcount)) - ib_uverbs_comp_dev(file->device); - - kfree(file); -} - -static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) -{ - struct ib_uverbs_event_file *file = filp->private_data; - struct ib_uverbs_event *event; - int eventsz; - int ret = 0; - - spin_lock_irq(&file->lock); - - while (list_empty(&file->event_list)) { - spin_unlock_irq(&file->lock); - - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(file->poll_wait, - (!list_empty(&file->event_list) || - /* The barriers built into wait_event_interruptible() - * and wake_up() guarentee this will see the null set - * without using RCU - */ - !file->uverbs_file->device->ib_dev))) - return -ERESTARTSYS; - - /* If device was disassociated and no event exists set an error */ - if (list_empty(&file->event_list) && - !file->uverbs_file->device->ib_dev) - return -EIO; - - spin_lock_irq(&file->lock); - } - - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); - - if (file->is_async) - eventsz = sizeof (struct ib_uverbs_async_event_desc); - else - eventsz = sizeof (struct ib_uverbs_comp_event_desc); - - if (eventsz > count) { - ret = -EINVAL; - event = NULL; - } else { - list_del(file->event_list.next); - if (event->counter) { - ++(*event->counter); - list_del(&event->obj_list); - } - } - - spin_unlock_irq(&file->lock); - - if (event) { - if (copy_to_user(buf, event, eventsz)) - ret = -EFAULT; - else - ret = eventsz; - } - - kfree(event); - - return ret; -} - -static unsigned int ib_uverbs_event_poll(struct file *filp, - struct poll_table_struct *wait) -{ - unsigned int pollflags = 0; - struct ib_uverbs_event_file *file = filp->private_data; - - poll_wait(filp, &file->poll_wait, wait); - - spin_lock_irq(&file->lock); - if (!list_empty(&file->event_list)) - pollflags = POLLIN | POLLRDNORM; - spin_unlock_irq(&file->lock); - - return pollflags; -} - -static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) -{ - struct ib_uverbs_event_file *file = filp->private_data; - - return fasync_helper(fd, filp, on, &file->async_queue); -} - -static int ib_uverbs_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uverbs_event_file *file = filp->private_data; - struct ib_uverbs_event *entry, *tmp; - int closed_already = 0; - - mutex_lock(&file->uverbs_file->device->lists_mutex); - spin_lock_irq(&file->lock); - closed_already = file->is_closed; - file->is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - spin_unlock_irq(&file->lock); - if (!closed_already) { - list_del(&file->list); - if (file->is_async) - ib_unregister_event_handler(&file->uverbs_file-> - event_handler); - } - mutex_unlock(&file->uverbs_file->device->lists_mutex); - - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_event_file); - - return 0; -} - -static const struct file_operations uverbs_event_fops = { - .owner = THIS_MODULE, - .read = ib_uverbs_event_read, - .poll = ib_uverbs_event_poll, - .release = ib_uverbs_event_close, - .fasync = ib_uverbs_event_fasync, - .llseek = no_llseek, -}; - -void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) -{ - struct ib_uverbs_event_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; - - if (!file) - return; - - spin_lock_irqsave(&file->lock, flags); - if (file->is_closed) { - spin_unlock_irqrestore(&file->lock, flags); - return; - } - - entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) { - spin_unlock_irqrestore(&file->lock, flags); - return; - } - - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); - - entry->desc.comp.cq_handle = cq->uobject->user_handle; - entry->counter = &uobj->comp_events_reported; - - list_add_tail(&entry->list, &file->event_list); - list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->lock, flags); - - wake_up_interruptible(&file->poll_wait); - linux_poll_wakeup(file->filp); - kill_fasync(&file->async_queue, SIGIO, POLL_IN); -} - -static void ib_uverbs_async_handler(struct ib_uverbs_file *file, - __u64 element, __u64 event, - struct list_head *obj_list, - u32 *counter) -{ - struct ib_uverbs_event *entry; - unsigned long flags; - - spin_lock_irqsave(&file->async_file->lock, flags); - if (file->async_file->is_closed) { - spin_unlock_irqrestore(&file->async_file->lock, flags); - return; - } - - entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) { - spin_unlock_irqrestore(&file->async_file->lock, flags); - return; - } - - entry->desc.async.element = element; - entry->desc.async.event_type = event; - entry->desc.async.reserved = 0; - entry->counter = counter; - - list_add_tail(&entry->list, &file->async_file->event_list); - if (obj_list) - list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->lock, flags); - - wake_up_interruptible(&file->async_file->poll_wait); - linux_poll_wakeup(file->async_file->filp); - kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); -} - -void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) -{ - struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, - struct ib_ucq_object, uobject); - - ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, - event->event, &uobj->async_list, - &uobj->async_events_reported); -} - -void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) -{ - struct ib_uevent_object *uobj; - - /* for XRC target qp's, check that qp is live */ - if (!event->element.qp->uobject || !event->element.qp->uobject->live) - return; - - uobj = container_of(event->element.qp->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); -} - -void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) -{ - struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); -} - -void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) -{ - struct ib_uevent_object *uobj; - - uobj = container_of(event->element.srq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); -} - -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) -{ - struct ib_uverbs_file *file = - container_of(handler, struct ib_uverbs_file, event_handler); - - ib_uverbs_async_handler(file, event->element.port_num, event->event, - NULL, NULL); -} - -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) -{ - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); - file->async_file = NULL; -} - -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async) -{ - struct ib_uverbs_event_file *ev_file; - struct file *filp; - int ret; - - ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); - if (!ev_file) - return ERR_PTR(-ENOMEM); - - kref_init(&ev_file->ref); - spin_lock_init(&ev_file->lock); - INIT_LIST_HEAD(&ev_file->event_list); - init_waitqueue_head(&ev_file->poll_wait); - ev_file->uverbs_file = uverbs_file; - kref_get(&ev_file->uverbs_file->ref); - ev_file->async_queue = NULL; - ev_file->is_closed = 0; - - /* - * fops_get() can't fail here, because we're coming from a - * system call on a uverbs file, which will already have a - * module reference. - */ - filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); - if (IS_ERR(filp)) - goto err_put_refs; - filp->private_data = ev_file; - ev_file->filp = filp; - - mutex_lock(&uverbs_file->device->lists_mutex); - list_add_tail(&ev_file->list, - &uverbs_file->device->uverbs_events_file_list); - mutex_unlock(&uverbs_file->device->lists_mutex); - - if (is_async) { - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&uverbs_file->event_handler); - if (ret) - goto err_put_file; - - /* At that point async file stuff was fully set */ - ev_file->is_async = 1; - } - - return filp; - -err_put_file: - fput(filp); - kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); - uverbs_file->async_file = NULL; - return ERR_PTR(ret); - -err_put_refs: - kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); - return filp; -} - -/* - * Look up a completion event file by FD. If lookup is successful, - * takes a ref to the event file struct that it returns; if - * unsuccessful, returns NULL. - */ -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) -{ - struct ib_uverbs_event_file *ev_file = NULL; - struct fd f = fdget(fd); - - if (!f.file) - return NULL; - - if (f.file->f_op != &uverbs_event_fops) - goto out; - - ev_file = f.file->private_data; - if (ev_file->is_async) { - ev_file = NULL; - goto out; - } - - kref_get(&ev_file->ref); - -out: - fdput(f); - return ev_file; -} - -static int verify_command_mask(struct ib_device *ib_dev, __u32 command) -{ - u64 mask; - - if (command <= IB_USER_VERBS_CMD_OPEN_QP) - mask = ib_dev->uverbs_cmd_mask; - else - mask = ib_dev->uverbs_ex_cmd_mask; - - if (mask & ((u64)1 << command)) - return 0; - - return -1; -} - -static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; - struct ib_uverbs_cmd_hdr hdr; - __u32 command; - __u32 flags; - int srcu_key; - ssize_t ret; - - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) - return -EACCES; - - if (count < sizeof hdr) - return -EINVAL; - - if (copy_from_user(&hdr, buf, sizeof hdr)) - return -EFAULT; - - srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto out; - } - - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; - } - - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; - if (verify_command_mask(ib_dev, command)) { - ret = -EOPNOTSUPP; - goto out; - } - - if (!file->ucontext && - command != IB_USER_VERBS_CMD_GET_CONTEXT) { - ret = -EINVAL; - goto out; - } - - flags = (hdr.command & - IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; - - if (!flags) { - if (command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[command]) { - ret = -EINVAL; - goto out; - } - - if (hdr.in_words * 4 != count) { - ret = -EINVAL; - goto out; - } - - ret = uverbs_cmd_table[command](file, ib_dev, - buf + sizeof(hdr), - hdr.in_words * 4, - hdr.out_words * 4); - - } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { - struct ib_uverbs_ex_cmd_hdr ex_hdr; - struct ib_udata ucore; - struct ib_udata uhw; - size_t written_count = count; - - if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || - !uverbs_ex_cmd_table[command]) { - ret = -ENOSYS; - goto out; - } - - if (!file->ucontext) { - ret = -EINVAL; - goto out; - } - - if (count < (sizeof(hdr) + sizeof(ex_hdr))) { - ret = -EINVAL; - goto out; - } - - if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { - ret = -EFAULT; - goto out; - } - - count -= sizeof(hdr) + sizeof(ex_hdr); - buf += sizeof(hdr) + sizeof(ex_hdr); - - if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { - ret = -EINVAL; - goto out; - } - - if (ex_hdr.cmd_hdr_reserved) { - ret = -EINVAL; - goto out; - } - - if (ex_hdr.response) { - if (!hdr.out_words && !ex_hdr.provider_out_words) { - ret = -EINVAL; - goto out; - } - - if (!access_ok(VERIFY_WRITE, - (void __user *) (unsigned long) ex_hdr.response, - (hdr.out_words + ex_hdr.provider_out_words) * 8)) { - ret = -EFAULT; - goto out; - } - } else { - if (hdr.out_words || ex_hdr.provider_out_words) { - ret = -EINVAL; - goto out; - } - } - - INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, - hdr.in_words * 8, hdr.out_words * 8); - - INIT_UDATA_BUF_OR_NULL(&uhw, - buf + ucore.inlen, - (unsigned long) ex_hdr.response + ucore.outlen, - ex_hdr.provider_in_words * 8, - ex_hdr.provider_out_words * 8); - - ret = uverbs_ex_cmd_table[command](file, - ib_dev, - &ucore, - &uhw); - if (!ret) - ret = written_count; - } else { - ret = -ENOSYS; - } - -out: - srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return ret; -} - -static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; - int ret = 0; - int srcu_key; - - srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto out; - } - - if (!file->ucontext) - ret = -ENODEV; - else - ret = ib_dev->mmap(file->ucontext, vma); -out: - srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return ret; -} - -/* - * ib_uverbs_open() does not need the BKL: - * - * - the ib_uverbs_device structures are properly reference counted and - * everything else is purely local to the file being created, so - * races against other open calls are not a problem; - * - there is no ioctl method to race against; - * - the open method will either immediately run -ENXIO, or all - * required initialization will be done. - */ -static int ib_uverbs_open(struct inode *inode, struct file *filp) -{ - struct ib_uverbs_device *dev; - struct ib_uverbs_file *file; - struct ib_device *ib_dev; - int ret; - int module_dependent; - int srcu_key; - - dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev); - if (!atomic_inc_not_zero(&dev->refcount)) - return -ENXIO; - - srcu_key = srcu_read_lock(&dev->disassociate_srcu); - mutex_lock(&dev->lists_mutex); - ib_dev = srcu_dereference(dev->ib_dev, - &dev->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto err; - } - - /* In case IB device supports disassociate ucontext, there is no hard - * dependency between uverbs device and its low level device. - */ - module_dependent = !(ib_dev->disassociate_ucontext); - - if (module_dependent) { - if (!try_module_get(ib_dev->owner)) { - ret = -ENODEV; - goto err; - } - } - - file = kzalloc(sizeof(*file), GFP_KERNEL); - if (!file) { - ret = -ENOMEM; - if (module_dependent) - goto err_module; - - goto err; - } - - file->device = dev; - file->ucontext = NULL; - file->async_file = NULL; - kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); - - filp->private_data = file; - kobject_get(&dev->kobj); - list_add_tail(&file->list, &dev->uverbs_file_list); - mutex_unlock(&dev->lists_mutex); - srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - - return nonseekable_open(inode, filp); - -err_module: - module_put(ib_dev->owner); - -err: - mutex_unlock(&dev->lists_mutex); - srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - if (atomic_dec_and_test(&dev->refcount)) - ib_uverbs_comp_dev(dev); - - return ret; -} - -static int ib_uverbs_close(struct inode *inode, struct file *filp) -{ - struct ib_uverbs_file *file = filp->private_data; - struct ib_uverbs_device *dev = file->device; - - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); - - mutex_lock(&file->device->lists_mutex); - if (!file->is_closed) { - list_del(&file->list); - file->is_closed = 1; - } - mutex_unlock(&file->device->lists_mutex); - - if (file->async_file) - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); - - kref_put(&file->ref, ib_uverbs_release_file); - kobject_put(&dev->kobj); - - return 0; -} - -static const struct file_operations uverbs_fops = { - .owner = THIS_MODULE, - .write = ib_uverbs_write, - .open = ib_uverbs_open, - .release = ib_uverbs_close, - .llseek = no_llseek, -}; - -static const struct file_operations uverbs_mmap_fops = { - .owner = THIS_MODULE, - .write = ib_uverbs_write, - .mmap = ib_uverbs_mmap, - .open = ib_uverbs_open, - .release = ib_uverbs_close, - .llseek = no_llseek, -}; - -static struct ib_client uverbs_client = { - .name = "uverbs", - .add = ib_uverbs_add_one, - .remove = ib_uverbs_remove_one -}; - -static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, - char *buf) -{ - int ret = -ENODEV; - int srcu_key; - struct ib_uverbs_device *dev = dev_get_drvdata(device); - struct ib_device *ib_dev; - - if (!dev) - return -ENODEV; - - srcu_key = srcu_read_lock(&dev->disassociate_srcu); - ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); - if (ib_dev) - ret = sprintf(buf, "%s\n", ib_dev->name); - srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - - return ret; -} -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - -static ssize_t show_dev_abi_version(struct device *device, - struct device_attribute *attr, char *buf) -{ - struct ib_uverbs_device *dev = dev_get_drvdata(device); - int ret = -ENODEV; - int srcu_key; - struct ib_device *ib_dev; - - if (!dev) - return -ENODEV; - srcu_key = srcu_read_lock(&dev->disassociate_srcu); - ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); - if (ib_dev) - ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); - srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - - return ret; -} -static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); - -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_VERBS_ABI_VERSION)); - -static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); - -/* - * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by - * requesting a new major number and doubling the number of max devices we - * support. It's stupid, but simple. - */ -static int find_overflow_devnum(void) -{ - int ret; - - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, - "infiniband_verbs"); - if (ret) { - pr_err("user_verbs: couldn't register dynamic device number\n"); - return ret; - } - } - - ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); - if (ret >= IB_UVERBS_MAX_DEVICES) - return -1; - - return ret; -} - -static void ib_uverbs_add_one(struct ib_device *device) -{ - int devnum; - dev_t base; - struct ib_uverbs_device *uverbs_dev; - int ret; - - if (!device->alloc_ucontext) - return; - - uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); - if (!uverbs_dev) - return; - - ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); - if (ret) { - kfree(uverbs_dev); - return; - } - - atomic_set(&uverbs_dev->refcount, 1); - init_completion(&uverbs_dev->comp); - uverbs_dev->xrcd_tree = RB_ROOT; - mutex_init(&uverbs_dev->xrcd_tree_mutex); - kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); - mutex_init(&uverbs_dev->lists_mutex); - INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); - INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); - - spin_lock(&map_lock); - devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); - if (devnum >= IB_UVERBS_MAX_DEVICES) { - spin_unlock(&map_lock); - devnum = find_overflow_devnum(); - if (devnum < 0) - goto err; - - spin_lock(&map_lock); - uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - uverbs_dev->devnum = devnum; - base = devnum + IB_UVERBS_BASE_DEV; - set_bit(devnum, dev_map); - } - spin_unlock(&map_lock); - - rcu_assign_pointer(uverbs_dev->ib_dev, device); - uverbs_dev->num_comp_vectors = device->num_comp_vectors; - - cdev_init(&uverbs_dev->cdev, NULL); - uverbs_dev->cdev.owner = THIS_MODULE; - uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; - kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->cdev, base, 1)) - goto err_cdev; - - uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev.dev, uverbs_dev, - "uverbs%d", uverbs_dev->devnum); - if (IS_ERR(uverbs_dev->dev)) - goto err_cdev; - - if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) - goto err_class; - if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) - goto err_class; - - ib_set_client_data(device, &uverbs_client, uverbs_dev); - - return; - -err_class: - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - -err_cdev: - cdev_del(&uverbs_dev->cdev); - if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); - -err: - if (atomic_dec_and_test(&uverbs_dev->refcount)) - ib_uverbs_comp_dev(uverbs_dev); - wait_for_completion(&uverbs_dev->comp); - kobject_put(&uverbs_dev->kobj); - return; -} - -static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, - struct ib_device *ib_dev) -{ - struct ib_uverbs_file *file; - struct ib_uverbs_event_file *event_file; - struct ib_event event; - - /* Pending running commands to terminate */ - synchronize_srcu(&uverbs_dev->disassociate_srcu); - event.event = IB_EVENT_DEVICE_FATAL; - event.element.port_num = 0; - event.device = ib_dev; - - mutex_lock(&uverbs_dev->lists_mutex); - while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; - file = list_first_entry(&uverbs_dev->uverbs_file_list, - struct ib_uverbs_file, list); - file->is_closed = 1; - list_del(&file->list); - kref_get(&file->ref); - mutex_unlock(&uverbs_dev->lists_mutex); - - ib_uverbs_event_handler(&file->event_handler, &event); - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ucontext - */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_dev->disassociate_ucontext(ucontext); - ib_uverbs_cleanup_ucontext(file, ucontext); - } - - mutex_lock(&uverbs_dev->lists_mutex); - kref_put(&file->ref, ib_uverbs_release_file); - } - - while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { - event_file = list_first_entry(&uverbs_dev-> - uverbs_events_file_list, - struct ib_uverbs_event_file, - list); - spin_lock_irq(&event_file->lock); - event_file->is_closed = 1; - spin_unlock_irq(&event_file->lock); - - list_del(&event_file->list); - if (event_file->is_async) { - ib_unregister_event_handler(&event_file->uverbs_file-> - event_handler); - event_file->uverbs_file->event_handler.device = NULL; - } - - wake_up_interruptible(&event_file->poll_wait); - linux_poll_wakeup(event_file->filp); - kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); - } - mutex_unlock(&uverbs_dev->lists_mutex); -} - -static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) -{ - struct ib_uverbs_device *uverbs_dev = client_data; - int wait_clients = 1; - - if (!uverbs_dev) - return; - - dev_set_drvdata(uverbs_dev->dev, NULL); - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - cdev_del(&uverbs_dev->cdev); - - if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) - clear_bit(uverbs_dev->devnum, dev_map); - else - clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); - - if (device->disassociate_ucontext) { - /* We disassociate HW resources and immediately return. - * Userspace will see a EIO errno for all future access. - * Upon returning, ib_device may be freed internally and is not - * valid any more. - * uverbs_device is still available until all clients close - * their files, then the uverbs device ref count will be zero - * and its resources will be freed. - * Note: At this point no more files can be opened since the - * cdev was deleted, however active clients can still issue - * commands and close their open files. - */ - rcu_assign_pointer(uverbs_dev->ib_dev, NULL); - ib_uverbs_free_hw_resources(uverbs_dev, device); - wait_clients = 0; - } - - if (atomic_dec_and_test(&uverbs_dev->refcount)) - ib_uverbs_comp_dev(uverbs_dev); - if (wait_clients) - wait_for_completion(&uverbs_dev->comp); - kobject_put(&uverbs_dev->kobj); -} - -static char *uverbs_devnode(struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); -} - -static int __init ib_uverbs_init(void) -{ - int ret; - - ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, - "infiniband_verbs"); - if (ret) { - pr_err("user_verbs: couldn't register device number\n"); - goto out; - } - - uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); - if (IS_ERR(uverbs_class)) { - ret = PTR_ERR(uverbs_class); - pr_err("user_verbs: couldn't create class infiniband_verbs\n"); - goto out_chrdev; - } - - uverbs_class->devnode = uverbs_devnode; - - ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("user_verbs: couldn't create abi_version attribute\n"); - goto out_class; - } - - ret = ib_register_client(&uverbs_client); - if (ret) { - pr_err("user_verbs: couldn't register client\n"); - goto out_class; - } - - return 0; - -out_class: - class_destroy(uverbs_class); - -out_chrdev: - unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); - -out: - return ret; -} - -static void __exit ib_uverbs_cleanup(void) -{ - ib_unregister_client(&uverbs_client); - class_destroy(uverbs_class); - unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); - idr_destroy(&ib_uverbs_pd_idr); - idr_destroy(&ib_uverbs_mr_idr); - idr_destroy(&ib_uverbs_mw_idr); - idr_destroy(&ib_uverbs_ah_idr); - idr_destroy(&ib_uverbs_cq_idr); - idr_destroy(&ib_uverbs_qp_idr); - idr_destroy(&ib_uverbs_srq_idr); -} - -module_init_order(ib_uverbs_init, SI_ORDER_THIRD); -module_exit(ib_uverbs_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_main.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/agent.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/agent.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/agent.c (nonexistent) @@ -1,222 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. - * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include - -#include "agent.h" -#include "smi.h" -#include "mad_priv.h" - -#define SPFX "ib_agent: " - -struct ib_agent_port_private { - struct list_head port_list; - struct ib_mad_agent *agent[2]; -}; - -static DEFINE_SPINLOCK(ib_agent_port_list_lock); -static LIST_HEAD(ib_agent_port_list); - -static struct ib_agent_port_private * -__ib_get_agent_port(const struct ib_device *device, int port_num) -{ - struct ib_agent_port_private *entry; - - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->agent[1]->device == device && - entry->agent[1]->port_num == port_num) - return entry; - } - return NULL; -} - -static struct ib_agent_port_private * -ib_get_agent_port(const struct ib_device *device, int port_num) -{ - struct ib_agent_port_private *entry; - unsigned long flags; - - spin_lock_irqsave(&ib_agent_port_list_lock, flags); - entry = __ib_get_agent_port(device, port_num); - spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - return entry; -} - -void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, - const struct ib_wc *wc, const struct ib_device *device, - int port_num, int qpn, size_t resp_mad_len, bool opa) -{ - struct ib_agent_port_private *port_priv; - struct ib_mad_agent *agent; - struct ib_mad_send_buf *send_buf; - struct ib_ah *ah; - struct ib_mad_send_wr_private *mad_send_wr; - - if (rdma_cap_ib_switch(device)) - port_priv = ib_get_agent_port(device, 0); - else - port_priv = ib_get_agent_port(device, port_num); - - if (!port_priv) { - dev_err(&device->dev, "Unable to find port agent\n"); - return; - } - - agent = port_priv->agent[qpn]; - ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); - if (IS_ERR(ah)) { - dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n", - PTR_ERR(ah)); - return; - } - - if (opa && mad_hdr->base_version != OPA_MGMT_BASE_VERSION) - resp_mad_len = IB_MGMT_MAD_SIZE; - - send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, - IB_MGMT_MAD_HDR, - resp_mad_len - IB_MGMT_MAD_HDR, - GFP_KERNEL, - mad_hdr->base_version); - if (IS_ERR(send_buf)) { - dev_err(&device->dev, "ib_create_send_mad error\n"); - goto err1; - } - - memcpy(send_buf->mad, mad_hdr, resp_mad_len); - send_buf->ah = ah; - - if (rdma_cap_ib_switch(device)) { - mad_send_wr = container_of(send_buf, - struct ib_mad_send_wr_private, - send_buf); - mad_send_wr->send_wr.port_num = port_num; - } - - if (ib_post_send_mad(send_buf, NULL)) { - dev_err(&device->dev, "ib_post_send_mad error\n"); - goto err2; - } - return; -err2: - ib_free_send_mad(send_buf); -err1: - ib_destroy_ah(ah); -} - -static void agent_send_handler(struct ib_mad_agent *mad_agent, - struct ib_mad_send_wc *mad_send_wc) -{ - ib_destroy_ah(mad_send_wc->send_buf->ah); - ib_free_send_mad(mad_send_wc->send_buf); -} - -int ib_agent_port_open(struct ib_device *device, int port_num) -{ - struct ib_agent_port_private *port_priv; - unsigned long flags; - int ret; - - /* Create new device info */ - port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); - if (!port_priv) { - dev_err(&device->dev, "No memory for ib_agent_port_private\n"); - ret = -ENOMEM; - goto error1; - } - - if (rdma_cap_ib_smi(device, port_num)) { - /* Obtain send only MAD agent for SMI QP */ - port_priv->agent[0] = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, NULL, 0, - &agent_send_handler, - NULL, NULL, 0); - if (IS_ERR(port_priv->agent[0])) { - ret = PTR_ERR(port_priv->agent[0]); - goto error2; - } - } - - /* Obtain send only MAD agent for GSI QP */ - port_priv->agent[1] = ib_register_mad_agent(device, port_num, - IB_QPT_GSI, NULL, 0, - &agent_send_handler, - NULL, NULL, 0); - if (IS_ERR(port_priv->agent[1])) { - ret = PTR_ERR(port_priv->agent[1]); - goto error3; - } - - spin_lock_irqsave(&ib_agent_port_list_lock, flags); - list_add_tail(&port_priv->port_list, &ib_agent_port_list); - spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - - return 0; - -error3: - if (port_priv->agent[0]) - ib_unregister_mad_agent(port_priv->agent[0]); -error2: - kfree(port_priv); -error1: - return ret; -} - -int ib_agent_port_close(struct ib_device *device, int port_num) -{ - struct ib_agent_port_private *port_priv; - unsigned long flags; - - spin_lock_irqsave(&ib_agent_port_list_lock, flags); - port_priv = __ib_get_agent_port(device, port_num); - if (port_priv == NULL) { - spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - dev_err(&device->dev, "Port %d not found\n", port_num); - return -ENODEV; - } - list_del(&port_priv->port_list); - spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - - ib_unregister_mad_agent(port_priv->agent[1]); - if (port_priv->agent[0]) - ib_unregister_mad_agent(port_priv->agent[0]); - - kfree(port_priv); - return 0; -} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/agent.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cm.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cm.c (nonexistent) @@ -1,4141 +0,0 @@ -/* - * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include "cm_msgs.h" - -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("InfiniBand CM"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void cm_add_one(struct ib_device *device); -static void cm_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client cm_client = { - .name = "cm", - .add = cm_add_one, - .remove = cm_remove_one -}; - -static struct ib_cm { - spinlock_t lock; - struct list_head device_list; - rwlock_t device_lock; - struct rb_root listen_service_table; - u64 listen_service_id; - /* struct rb_root peer_service_table; todo: fix peer to peer */ - struct rb_root remote_qp_table; - struct rb_root remote_id_table; - struct rb_root remote_sidr_table; - struct idr local_id_table; - __be32 random_id_operand; - struct list_head timewait_list; - struct workqueue_struct *wq; - /* Sync on cm change port state */ - spinlock_t state_lock; -} cm; - -/* Counter indexes ordered by attribute ID */ -enum { - CM_REQ_COUNTER, - CM_MRA_COUNTER, - CM_REJ_COUNTER, - CM_REP_COUNTER, - CM_RTU_COUNTER, - CM_DREQ_COUNTER, - CM_DREP_COUNTER, - CM_SIDR_REQ_COUNTER, - CM_SIDR_REP_COUNTER, - CM_LAP_COUNTER, - CM_APR_COUNTER, - CM_ATTR_COUNT, - CM_ATTR_ID_OFFSET = 0x0010, -}; - -enum { - CM_XMIT, - CM_XMIT_RETRIES, - CM_RECV, - CM_RECV_DUPLICATES, - CM_COUNTER_GROUPS -}; - -static char const counter_group_names[CM_COUNTER_GROUPS] - [sizeof("cm_rx_duplicates")] = { - "cm_tx_msgs", "cm_tx_retries", - "cm_rx_msgs", "cm_rx_duplicates" -}; - -struct cm_counter_group { - struct kobject obj; - atomic_long_t counter[CM_ATTR_COUNT]; -}; - -struct cm_counter_attribute { - struct attribute attr; - int index; -}; - -#define CM_COUNTER_ATTR(_name, _index) \ -struct cm_counter_attribute cm_##_name##_counter_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .index = _index \ -} - -static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); -static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); -static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); -static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); -static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); -static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); -static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); -static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); -static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); -static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); -static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); - -static struct attribute *cm_counter_default_attrs[] = { - &cm_req_counter_attr.attr, - &cm_mra_counter_attr.attr, - &cm_rej_counter_attr.attr, - &cm_rep_counter_attr.attr, - &cm_rtu_counter_attr.attr, - &cm_dreq_counter_attr.attr, - &cm_drep_counter_attr.attr, - &cm_sidr_req_counter_attr.attr, - &cm_sidr_rep_counter_attr.attr, - &cm_lap_counter_attr.attr, - &cm_apr_counter_attr.attr, - NULL -}; - -struct cm_port { - struct cm_device *cm_dev; - struct ib_mad_agent *mad_agent; - struct kobject port_obj; - u8 port_num; - struct list_head cm_priv_prim_list; - struct list_head cm_priv_altr_list; - struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; -}; - -struct cm_device { - struct list_head list; - struct ib_device *ib_device; - struct device *device; - u8 ack_delay; - int going_down; - struct cm_port *port[0]; -}; - -struct cm_av { - struct cm_port *port; - union ib_gid dgid; - struct ib_ah_attr ah_attr; - u16 pkey_index; - u8 timeout; -}; - -struct cm_work { - struct delayed_work work; - struct list_head list; - struct cm_port *port; - struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ - __be32 local_id; /* Established / timewait */ - __be32 remote_id; - struct ib_cm_event cm_event; - struct ib_sa_path_rec path[0]; -}; - -struct cm_timewait_info { - struct cm_work work; /* Must be first. */ - struct list_head list; - struct rb_node remote_qp_node; - struct rb_node remote_id_node; - __be64 remote_ca_guid; - __be32 remote_qpn; - u8 inserted_remote_qp; - u8 inserted_remote_id; -}; - -struct cm_id_private { - struct ib_cm_id id; - - struct rb_node service_node; - struct rb_node sidr_id_node; - spinlock_t lock; /* Do not acquire inside cm.lock */ - struct completion comp; - atomic_t refcount; - /* Number of clients sharing this ib_cm_id. Only valid for listeners. - * Protected by the cm.lock spinlock. */ - int listen_sharecount; - - struct ib_mad_send_buf *msg; - struct cm_timewait_info *timewait_info; - /* todo: use alternate port on send failure */ - struct cm_av av; - struct cm_av alt_av; - - void *private_data; - __be64 tid; - __be32 local_qpn; - __be32 remote_qpn; - enum ib_qp_type qp_type; - __be32 sq_psn; - __be32 rq_psn; - int timeout_ms; - enum ib_mtu path_mtu; - __be16 pkey; - u8 private_data_len; - u8 max_cm_retries; - u8 peer_to_peer; - u8 responder_resources; - u8 initiator_depth; - u8 retry_count; - u8 rnr_retry_count; - u8 service_timeout; - u8 target_ack_delay; - - struct list_head prim_list; - struct list_head altr_list; - /* Indicates that the send port mad is registered and av is set */ - int prim_send_port_not_ready; - int altr_send_port_not_ready; - - struct list_head work_list; - atomic_t work_count; -}; - -static void cm_work_handler(struct work_struct *work); - -static inline void cm_deref_id(struct cm_id_private *cm_id_priv) -{ - if (atomic_dec_and_test(&cm_id_priv->refcount)) - complete(&cm_id_priv->comp); -} - -static int cm_alloc_msg(struct cm_id_private *cm_id_priv, - struct ib_mad_send_buf **msg) -{ - struct ib_mad_agent *mad_agent; - struct ib_mad_send_buf *m; - struct ib_ah *ah; - struct cm_av *av; - unsigned long flags, flags2; - int ret = 0; - - /* don't let the port to be released till the agent is down */ - spin_lock_irqsave(&cm.state_lock, flags2); - spin_lock_irqsave(&cm.lock, flags); - if (!cm_id_priv->prim_send_port_not_ready) - av = &cm_id_priv->av; - else if (!cm_id_priv->altr_send_port_not_ready && - (cm_id_priv->alt_av.port)) - av = &cm_id_priv->alt_av; - else { - pr_info("%s: not valid CM id\n", __func__); - ret = -ENODEV; - spin_unlock_irqrestore(&cm.lock, flags); - goto out; - } - spin_unlock_irqrestore(&cm.lock, flags); - /* Make sure the port haven't released the mad yet */ - mad_agent = cm_id_priv->av.port->mad_agent; - if (!mad_agent) { - pr_info("%s: not a valid MAD agent\n", __func__); - ret = -ENODEV; - goto out; - } - ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - goto out; - } - - m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - av->pkey_index, - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC, - IB_MGMT_BASE_VERSION); - if (IS_ERR(m)) { - ib_destroy_ah(ah); - ret = PTR_ERR(m); - goto out; - } - - /* Timeout set by caller if response is expected. */ - m->ah = ah; - m->retries = cm_id_priv->max_cm_retries; - - atomic_inc(&cm_id_priv->refcount); - m->context[0] = cm_id_priv; - *msg = m; - -out: - spin_unlock_irqrestore(&cm.state_lock, flags2); - return ret; -} - -static int cm_alloc_response_msg(struct cm_port *port, - struct ib_mad_recv_wc *mad_recv_wc, - struct ib_mad_send_buf **msg) -{ - struct ib_mad_send_buf *m; - struct ib_ah *ah; - - ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, - mad_recv_wc->recv_buf.grh, port->port_num); - if (IS_ERR(ah)) - return PTR_ERR(ah); - - m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC, - IB_MGMT_BASE_VERSION); - if (IS_ERR(m)) { - ib_destroy_ah(ah); - return PTR_ERR(m); - } - m->ah = ah; - *msg = m; - return 0; -} - -static void cm_free_msg(struct ib_mad_send_buf *msg) -{ - ib_destroy_ah(msg->ah); - if (msg->context[0]) - cm_deref_id(msg->context[0]); - ib_free_send_mad(msg); -} - -static void * cm_copy_private_data(const void *private_data, - u8 private_data_len) -{ - void *data; - - if (!private_data || !private_data_len) - return NULL; - - data = kmemdup(private_data, private_data_len, GFP_KERNEL); - if (!data) - return ERR_PTR(-ENOMEM); - - return data; -} - -static void cm_set_private_data(struct cm_id_private *cm_id_priv, - void *private_data, u8 private_data_len) -{ - if (cm_id_priv->private_data && cm_id_priv->private_data_len) - kfree(cm_id_priv->private_data); - - cm_id_priv->private_data = private_data; - cm_id_priv->private_data_len = private_data_len; -} - -static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, - struct ib_grh *grh, struct cm_av *av) -{ - av->port = port; - av->pkey_index = wc->pkey_index; - ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, - grh, &av->ah_attr); -} - -static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, - struct cm_id_private *cm_id_priv) -{ - struct cm_device *cm_dev; - struct cm_port *port = NULL; - unsigned long flags; - int ret; - u8 p; - struct net_device *ndev = ib_get_ndev_from_path(path); - - read_lock_irqsave(&cm.device_lock, flags); - list_for_each_entry(cm_dev, &cm.device_list, list) { - if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - path->gid_type, ndev, &p, NULL)) { - port = cm_dev->port[p-1]; - break; - } - } - read_unlock_irqrestore(&cm.device_lock, flags); - - if (ndev) - dev_put(ndev); - - if (!port) - return -EINVAL; - - ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, - be16_to_cpu(path->pkey), &av->pkey_index); - if (ret) - return ret; - - av->port = port; - ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num, - path, &av->ah_attr); - if (ret) - return ret; - - av->timeout = path->packet_life_time + 1; - - spin_lock_irqsave(&cm.lock, flags); - if (&cm_id_priv->av == av) - list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); - else if (&cm_id_priv->alt_av == av) - list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); - else - ret = -EINVAL; - - spin_unlock_irqrestore(&cm.lock, flags); - - return ret; -} - -static int cm_alloc_id(struct cm_id_private *cm_id_priv) -{ - unsigned long flags; - int id; - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&cm.lock, flags); - - id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&cm.lock, flags); - idr_preload_end(); - - cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return id < 0 ? id : 0; -} - -static void cm_free_id(__be32 local_id) -{ - spin_lock_irq(&cm.lock); - idr_remove(&cm.local_id_table, - (__force int) (local_id ^ cm.random_id_operand)); - spin_unlock_irq(&cm.lock); -} - -static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) -{ - struct cm_id_private *cm_id_priv; - - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) (local_id ^ cm.random_id_operand)); - if (cm_id_priv) { - if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); - else - cm_id_priv = NULL; - } - - return cm_id_priv; -} - -static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) -{ - struct cm_id_private *cm_id_priv; - - spin_lock_irq(&cm.lock); - cm_id_priv = cm_get_id(local_id, remote_id); - spin_unlock_irq(&cm.lock); - - return cm_id_priv; -} - -/* - * Trivial helpers to strip endian annotation and compare; the - * endianness doesn't actually matter since we just need a stable - * order for the RB tree. - */ -static int be32_lt(__be32 a, __be32 b) -{ - return (__force u32) a < (__force u32) b; -} - -static int be32_gt(__be32 a, __be32 b) -{ - return (__force u32) a > (__force u32) b; -} - -static int be64_lt(__be64 a, __be64 b) -{ - return (__force u64) a < (__force u64) b; -} - -static int be64_gt(__be64 a, __be64 b) -{ - return (__force u64) a > (__force u64) b; -} - -static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) -{ - struct rb_node **link = &cm.listen_service_table.rb_node; - struct rb_node *parent = NULL; - struct cm_id_private *cur_cm_id_priv; - __be64 service_id = cm_id_priv->id.service_id; - __be64 service_mask = cm_id_priv->id.service_mask; - - while (*link) { - parent = *link; - cur_cm_id_priv = rb_entry(parent, struct cm_id_private, - service_node); - if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) - return cur_cm_id_priv; - - if (cm_id_priv->id.device < cur_cm_id_priv->id.device) - link = &(*link)->rb_left; - else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) - link = &(*link)->rb_right; - else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_left; - else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_right; - else - link = &(*link)->rb_right; - } - rb_link_node(&cm_id_priv->service_node, parent, link); - rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); - return NULL; -} - -static struct cm_id_private * cm_find_listen(struct ib_device *device, - __be64 service_id) -{ - struct rb_node *node = cm.listen_service_table.rb_node; - struct cm_id_private *cm_id_priv; - - while (node) { - cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - if ((cm_id_priv->id.service_mask & service_id) == - cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) - return cm_id_priv; - - if (device < cm_id_priv->id.device) - node = node->rb_left; - else if (device > cm_id_priv->id.device) - node = node->rb_right; - else if (be64_lt(service_id, cm_id_priv->id.service_id)) - node = node->rb_left; - else if (be64_gt(service_id, cm_id_priv->id.service_id)) - node = node->rb_right; - else - node = node->rb_right; - } - return NULL; -} - -static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info - *timewait_info) -{ - struct rb_node **link = &cm.remote_id_table.rb_node; - struct rb_node *parent = NULL; - struct cm_timewait_info *cur_timewait_info; - __be64 remote_ca_guid = timewait_info->remote_ca_guid; - __be32 remote_id = timewait_info->work.remote_id; - - while (*link) { - parent = *link; - cur_timewait_info = rb_entry(parent, struct cm_timewait_info, - remote_id_node); - if (be32_lt(remote_id, cur_timewait_info->work.remote_id)) - link = &(*link)->rb_left; - else if (be32_gt(remote_id, cur_timewait_info->work.remote_id)) - link = &(*link)->rb_right; - else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) - link = &(*link)->rb_left; - else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) - link = &(*link)->rb_right; - else - return cur_timewait_info; - } - timewait_info->inserted_remote_id = 1; - rb_link_node(&timewait_info->remote_id_node, parent, link); - rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); - return NULL; -} - -static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, - __be32 remote_id) -{ - struct rb_node *node = cm.remote_id_table.rb_node; - struct cm_timewait_info *timewait_info; - - while (node) { - timewait_info = rb_entry(node, struct cm_timewait_info, - remote_id_node); - if (be32_lt(remote_id, timewait_info->work.remote_id)) - node = node->rb_left; - else if (be32_gt(remote_id, timewait_info->work.remote_id)) - node = node->rb_right; - else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid)) - node = node->rb_left; - else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) - node = node->rb_right; - else - return timewait_info; - } - return NULL; -} - -static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info - *timewait_info) -{ - struct rb_node **link = &cm.remote_qp_table.rb_node; - struct rb_node *parent = NULL; - struct cm_timewait_info *cur_timewait_info; - __be64 remote_ca_guid = timewait_info->remote_ca_guid; - __be32 remote_qpn = timewait_info->remote_qpn; - - while (*link) { - parent = *link; - cur_timewait_info = rb_entry(parent, struct cm_timewait_info, - remote_qp_node); - if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn)) - link = &(*link)->rb_left; - else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn)) - link = &(*link)->rb_right; - else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) - link = &(*link)->rb_left; - else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) - link = &(*link)->rb_right; - else - return cur_timewait_info; - } - timewait_info->inserted_remote_qp = 1; - rb_link_node(&timewait_info->remote_qp_node, parent, link); - rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); - return NULL; -} - -static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private - *cm_id_priv) -{ - struct rb_node **link = &cm.remote_sidr_table.rb_node; - struct rb_node *parent = NULL; - struct cm_id_private *cur_cm_id_priv; - union ib_gid *port_gid = &cm_id_priv->av.dgid; - __be32 remote_id = cm_id_priv->id.remote_id; - - while (*link) { - parent = *link; - cur_cm_id_priv = rb_entry(parent, struct cm_id_private, - sidr_id_node); - if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id)) - link = &(*link)->rb_left; - else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) - link = &(*link)->rb_right; - else { - int cmp; - cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, - sizeof *port_gid); - if (cmp < 0) - link = &(*link)->rb_left; - else if (cmp > 0) - link = &(*link)->rb_right; - else - return cur_cm_id_priv; - } - } - rb_link_node(&cm_id_priv->sidr_id_node, parent, link); - rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); - return NULL; -} - -static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, - enum ib_cm_sidr_status status) -{ - struct ib_cm_sidr_rep_param param; - - memset(¶m, 0, sizeof param); - param.status = status; - ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); -} - -struct ib_cm_id *ib_create_cm_id(struct ib_device *device, - ib_cm_handler cm_handler, - void *context) -{ - struct cm_id_private *cm_id_priv; - int ret; - - cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); - if (!cm_id_priv) - return ERR_PTR(-ENOMEM); - - cm_id_priv->id.state = IB_CM_IDLE; - cm_id_priv->id.device = device; - cm_id_priv->id.cm_handler = cm_handler; - cm_id_priv->id.context = context; - cm_id_priv->id.remote_cm_qpn = 1; - ret = cm_alloc_id(cm_id_priv); - if (ret) - goto error; - - spin_lock_init(&cm_id_priv->lock); - init_completion(&cm_id_priv->comp); - INIT_LIST_HEAD(&cm_id_priv->work_list); - INIT_LIST_HEAD(&cm_id_priv->prim_list); - INIT_LIST_HEAD(&cm_id_priv->altr_list); - atomic_set(&cm_id_priv->work_count, -1); - atomic_set(&cm_id_priv->refcount, 1); - return &cm_id_priv->id; - -error: - kfree(cm_id_priv); - return ERR_PTR(-ENOMEM); -} -EXPORT_SYMBOL(ib_create_cm_id); - -static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) -{ - struct cm_work *work; - - if (list_empty(&cm_id_priv->work_list)) - return NULL; - - work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); - list_del(&work->list); - return work; -} - -static void cm_free_work(struct cm_work *work) -{ - if (work->mad_recv_wc) - ib_free_recv_mad(work->mad_recv_wc); - kfree(work); -} - -static inline int cm_convert_to_ms(int iba_time) -{ - /* approximate conversion to ms from 4.096us x 2^iba_time */ - return 1 << max(iba_time - 8, 0); -} - -/* - * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time - * Because of how ack_timeout is stored, adding one doubles the timeout. - * To avoid large timeouts, select the max(ack_delay, life_time + 1), and - * increment it (round up) only if the other is within 50%. - */ -static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) -{ - int ack_timeout = packet_life_time + 1; - - if (ack_timeout >= ca_ack_delay) - ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); - else - ack_timeout = ca_ack_delay + - (ack_timeout >= (ca_ack_delay - 1)); - - return min(31, ack_timeout); -} - -static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) -{ - if (timewait_info->inserted_remote_id) { - rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); - timewait_info->inserted_remote_id = 0; - } - - if (timewait_info->inserted_remote_qp) { - rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); - timewait_info->inserted_remote_qp = 0; - } -} - -static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) -{ - struct cm_timewait_info *timewait_info; - - timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); - if (!timewait_info) - return ERR_PTR(-ENOMEM); - - timewait_info->work.local_id = local_id; - INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler); - timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; - return timewait_info; -} - -static void cm_enter_timewait(struct cm_id_private *cm_id_priv) -{ - int wait_time; - unsigned long flags; - struct cm_device *cm_dev; - - cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); - if (!cm_dev) - return; - - spin_lock_irqsave(&cm.lock, flags); - cm_cleanup_timewait(cm_id_priv->timewait_info); - list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); - spin_unlock_irqrestore(&cm.lock, flags); - - /* - * The cm_id could be destroyed by the user before we exit timewait. - * To protect against this, we search for the cm_id after exiting - * timewait before notifying the user that we've exited timewait. - */ - cm_id_priv->id.state = IB_CM_TIMEWAIT; - wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); - - /* Check if the device started its remove_one */ - spin_lock_irqsave(&cm.lock, flags); - if (!cm_dev->going_down) - queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, - msecs_to_jiffies(wait_time)); - spin_unlock_irqrestore(&cm.lock, flags); - - cm_id_priv->timewait_info = NULL; -} - -static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) -{ - unsigned long flags; - - cm_id_priv->id.state = IB_CM_IDLE; - if (cm_id_priv->timewait_info) { - spin_lock_irqsave(&cm.lock, flags); - cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irqrestore(&cm.lock, flags); - kfree(cm_id_priv->timewait_info); - cm_id_priv->timewait_info = NULL; - } -} - -static void cm_destroy_id(struct ib_cm_id *cm_id, int err) -{ - struct cm_id_private *cm_id_priv; - struct cm_work *work; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); -retest: - spin_lock_irq(&cm_id_priv->lock); - switch (cm_id->state) { - case IB_CM_LISTEN: - spin_unlock_irq(&cm_id_priv->lock); - - spin_lock_irq(&cm.lock); - if (--cm_id_priv->listen_sharecount > 0) { - /* The id is still shared. */ - cm_deref_id(cm_id_priv); - spin_unlock_irq(&cm.lock); - return; - } - rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); - spin_unlock_irq(&cm.lock); - break; - case IB_CM_SIDR_REQ_SENT: - cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irq(&cm_id_priv->lock); - break; - case IB_CM_SIDR_REQ_RCVD: - spin_unlock_irq(&cm_id_priv->lock); - cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); - spin_lock_irq(&cm.lock); - if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) - rb_erase(&cm_id_priv->sidr_id_node, - &cm.remote_sidr_table); - spin_unlock_irq(&cm.lock); - break; - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, - &cm_id_priv->id.device->node_guid, - sizeof cm_id_priv->id.device->node_guid, - NULL, 0); - break; - case IB_CM_REQ_RCVD: - if (err == -ENOMEM) { - /* Do not reject to allow future retries. */ - cm_reset_to_idle(cm_id_priv); - spin_unlock_irq(&cm_id_priv->lock); - } else { - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); - } - break; - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - /* Fall through */ - case IB_CM_MRA_REQ_SENT: - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); - break; - case IB_CM_ESTABLISHED: - spin_unlock_irq(&cm_id_priv->lock); - if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) - break; - ib_send_cm_dreq(cm_id, NULL, 0); - goto retest; - case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - cm_enter_timewait(cm_id_priv); - spin_unlock_irq(&cm_id_priv->lock); - break; - case IB_CM_DREQ_RCVD: - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_drep(cm_id, NULL, 0); - break; - default: - spin_unlock_irq(&cm_id_priv->lock); - break; - } - - spin_lock_irq(&cm.lock); - if (!list_empty(&cm_id_priv->altr_list) && - (!cm_id_priv->altr_send_port_not_ready)) - list_del(&cm_id_priv->altr_list); - if (!list_empty(&cm_id_priv->prim_list) && - (!cm_id_priv->prim_send_port_not_ready)) - list_del(&cm_id_priv->prim_list); - spin_unlock_irq(&cm.lock); - - cm_free_id(cm_id->local_id); - cm_deref_id(cm_id_priv); - wait_for_completion(&cm_id_priv->comp); - while ((work = cm_dequeue_work(cm_id_priv)) != NULL) - cm_free_work(work); - kfree(cm_id_priv->private_data); - kfree(cm_id_priv); -} - -void ib_destroy_cm_id(struct ib_cm_id *cm_id) -{ - cm_destroy_id(cm_id, 0); -} -EXPORT_SYMBOL(ib_destroy_cm_id); - -/** - * __ib_cm_listen - Initiates listening on the specified service ID for - * connection and service ID resolution requests. - * @cm_id: Connection identifier associated with the listen request. - * @service_id: Service identifier matched against incoming connection - * and service ID resolution requests. The service ID should be specified - * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will - * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. - */ -static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, - __be64 service_mask) -{ - struct cm_id_private *cm_id_priv, *cur_cm_id_priv; - int ret = 0; - - service_mask = service_mask ? service_mask : ~cpu_to_be64(0); - service_id &= service_mask; - if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && - (service_id != IB_CM_ASSIGN_SERVICE_ID)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - if (cm_id->state != IB_CM_IDLE) - return -EINVAL; - - cm_id->state = IB_CM_LISTEN; - ++cm_id_priv->listen_sharecount; - - if (service_id == IB_CM_ASSIGN_SERVICE_ID) { - cm_id->service_id = cpu_to_be64(cm.listen_service_id++); - cm_id->service_mask = ~cpu_to_be64(0); - } else { - cm_id->service_id = service_id; - cm_id->service_mask = service_mask; - } - cur_cm_id_priv = cm_insert_listen(cm_id_priv); - - if (cur_cm_id_priv) { - cm_id->state = IB_CM_IDLE; - --cm_id_priv->listen_sharecount; - ret = -EBUSY; - } - return ret; -} - -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm.lock, flags); - ret = __ib_cm_listen(cm_id, service_id, service_mask); - spin_unlock_irqrestore(&cm.lock, flags); - - return ret; -} -EXPORT_SYMBOL(ib_cm_listen); - -/** - * Create a new listening ib_cm_id and listen on the given service ID. - * - * If there's an existing ID listening on that same device and service ID, - * return it. - * - * @device: Device associated with the cm_id. All related communication will - * be associated with the specified device. - * @cm_handler: Callback invoked to notify the user of CM events. - * @service_id: Service identifier matched against incoming connection - * and service ID resolution requests. The service ID should be specified - * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will - * assign a service ID to the caller. - * - * Callers should call ib_destroy_cm_id when done with the listener ID. - */ -struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, - ib_cm_handler cm_handler, - __be64 service_id) -{ - struct cm_id_private *cm_id_priv; - struct ib_cm_id *cm_id; - unsigned long flags; - int err = 0; - - /* Create an ID in advance, since the creation may sleep */ - cm_id = ib_create_cm_id(device, cm_handler, NULL); - if (IS_ERR(cm_id)) - return cm_id; - - spin_lock_irqsave(&cm.lock, flags); - - if (service_id == IB_CM_ASSIGN_SERVICE_ID) - goto new_id; - - /* Find an existing ID */ - cm_id_priv = cm_find_listen(device, service_id); - if (cm_id_priv) { - if (cm_id->cm_handler != cm_handler || cm_id->context) { - /* Sharing an ib_cm_id with different handlers is not - * supported */ - spin_unlock_irqrestore(&cm.lock, flags); - return ERR_PTR(-EINVAL); - } - atomic_inc(&cm_id_priv->refcount); - ++cm_id_priv->listen_sharecount; - spin_unlock_irqrestore(&cm.lock, flags); - - ib_destroy_cm_id(cm_id); - cm_id = &cm_id_priv->id; - return cm_id; - } - -new_id: - /* Use newly created ID */ - err = __ib_cm_listen(cm_id, service_id, 0); - - spin_unlock_irqrestore(&cm.lock, flags); - - if (err) { - ib_destroy_cm_id(cm_id); - return ERR_PTR(err); - } - return cm_id; -} -EXPORT_SYMBOL(ib_cm_insert_listen); - -static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, - enum cm_msg_sequence msg_seq) -{ - u64 hi_tid, low_tid; - - hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | - (msg_seq << 30)); - return cpu_to_be64(hi_tid | low_tid); -} - -static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, - __be16 attr_id, __be64 tid) -{ - hdr->base_version = IB_MGMT_BASE_VERSION; - hdr->mgmt_class = IB_MGMT_CLASS_CM; - hdr->class_version = IB_CM_CLASS_VERSION; - hdr->method = IB_MGMT_METHOD_SEND; - hdr->attr_id = attr_id; - hdr->tid = tid; -} - -static void cm_format_req(struct cm_req_msg *req_msg, - struct cm_id_private *cm_id_priv, - struct ib_cm_req_param *param) -{ - struct ib_sa_path_rec *pri_path = param->primary_path; - struct ib_sa_path_rec *alt_path = param->alternate_path; - - cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); - - req_msg->local_comm_id = cm_id_priv->id.local_id; - req_msg->service_id = param->service_id; - req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; - cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_init_depth(req_msg, param->initiator_depth); - cm_req_set_remote_resp_timeout(req_msg, - param->remote_cm_response_timeout); - cm_req_set_qp_type(req_msg, param->qp_type); - cm_req_set_flow_ctrl(req_msg, param->flow_control); - cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); - cm_req_set_local_resp_timeout(req_msg, - param->local_cm_response_timeout); - req_msg->pkey = param->primary_path->pkey; - cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); - - if (param->qp_type != IB_QPT_XRC_INI) { - cm_req_set_resp_res(req_msg, param->responder_resources); - cm_req_set_retry_count(req_msg, param->retry_count); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); - cm_req_set_srq(req_msg, param->srq); - } - - if (pri_path->hop_limit <= 1) { - req_msg->primary_local_lid = pri_path->slid; - req_msg->primary_remote_lid = pri_path->dlid; - } else { - /* Work-around until there's a way to obtain remote LID info */ - req_msg->primary_local_lid = IB_LID_PERMISSIVE; - req_msg->primary_remote_lid = IB_LID_PERMISSIVE; - } - req_msg->primary_local_gid = pri_path->sgid; - req_msg->primary_remote_gid = pri_path->dgid; - cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); - cm_req_set_primary_packet_rate(req_msg, pri_path->rate); - req_msg->primary_traffic_class = pri_path->traffic_class; - req_msg->primary_hop_limit = pri_path->hop_limit; - cm_req_set_primary_sl(req_msg, pri_path->sl); - cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); - cm_req_set_primary_local_ack_timeout(req_msg, - cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - pri_path->packet_life_time)); - - if (alt_path) { - if (alt_path->hop_limit <= 1) { - req_msg->alt_local_lid = alt_path->slid; - req_msg->alt_remote_lid = alt_path->dlid; - } else { - req_msg->alt_local_lid = IB_LID_PERMISSIVE; - req_msg->alt_remote_lid = IB_LID_PERMISSIVE; - } - req_msg->alt_local_gid = alt_path->sgid; - req_msg->alt_remote_gid = alt_path->dgid; - cm_req_set_alt_flow_label(req_msg, - alt_path->flow_label); - cm_req_set_alt_packet_rate(req_msg, alt_path->rate); - req_msg->alt_traffic_class = alt_path->traffic_class; - req_msg->alt_hop_limit = alt_path->hop_limit; - cm_req_set_alt_sl(req_msg, alt_path->sl); - cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); - cm_req_set_alt_local_ack_timeout(req_msg, - cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - alt_path->packet_life_time)); - } - - if (param->private_data && param->private_data_len) - memcpy(req_msg->private_data, param->private_data, - param->private_data_len); -} - -static int cm_validate_req_param(struct ib_cm_req_param *param) -{ - /* peer-to-peer not supported */ - if (param->peer_to_peer) - return -EINVAL; - - if (!param->primary_path) - return -EINVAL; - - if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && - param->qp_type != IB_QPT_XRC_INI) - return -EINVAL; - - if (param->private_data && - param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) - return -EINVAL; - - if (param->alternate_path && - (param->alternate_path->pkey != param->primary_path->pkey || - param->alternate_path->mtu != param->primary_path->mtu)) - return -EINVAL; - - return 0; -} - -int ib_send_cm_req(struct ib_cm_id *cm_id, - struct ib_cm_req_param *param) -{ - struct cm_id_private *cm_id_priv; - struct cm_req_msg *req_msg; - unsigned long flags; - int ret; - - ret = cm_validate_req_param(param); - if (ret) - return ret; - - /* Verify that we're not in timewait. */ - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_IDLE) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto out; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> - id.local_id); - if (IS_ERR(cm_id_priv->timewait_info)) { - ret = PTR_ERR(cm_id_priv->timewait_info); - goto out; - } - - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, - cm_id_priv); - if (ret) - goto error1; - if (param->alternate_path) { - ret = cm_init_av_by_path(param->alternate_path, - &cm_id_priv->alt_av, cm_id_priv); - if (ret) - goto error1; - } - cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); - cm_id_priv->timeout_ms = cm_convert_to_ms( - param->primary_path->packet_life_time) * 2 + - cm_convert_to_ms( - param->remote_cm_response_timeout); - cm_id_priv->max_cm_retries = param->max_cm_retries; - cm_id_priv->initiator_depth = param->initiator_depth; - cm_id_priv->responder_resources = param->responder_resources; - cm_id_priv->retry_count = param->retry_count; - cm_id_priv->path_mtu = param->primary_path->mtu; - cm_id_priv->pkey = param->primary_path->pkey; - cm_id_priv->qp_type = param->qp_type; - - ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); - if (ret) - goto error1; - - req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; - cm_format_req(req_msg, cm_id_priv, param); - cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; - cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; - - cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); - cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - goto error2; - } - BUG_ON(cm_id->state != IB_CM_IDLE); - cm_id->state = IB_CM_REQ_SENT; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return 0; - -error2: cm_free_msg(cm_id_priv->msg); -error1: kfree(cm_id_priv->timewait_info); -out: return ret; -} -EXPORT_SYMBOL(ib_send_cm_req); - -static int cm_issue_rej(struct cm_port *port, - struct ib_mad_recv_wc *mad_recv_wc, - enum ib_cm_rej_reason reason, - enum cm_msg_response msg_rejected, - void *ari, u8 ari_length) -{ - struct ib_mad_send_buf *msg = NULL; - struct cm_rej_msg *rej_msg, *rcv_msg; - int ret; - - ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); - if (ret) - return ret; - - /* We just need common CM header information. Cast to any message. */ - rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; - rej_msg = (struct cm_rej_msg *) msg->mad; - - cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); - rej_msg->remote_comm_id = rcv_msg->local_comm_id; - rej_msg->local_comm_id = rcv_msg->remote_comm_id; - cm_rej_set_msg_rejected(rej_msg, msg_rejected); - rej_msg->reason = cpu_to_be16(reason); - - if (ari && ari_length) { - cm_rej_set_reject_info_len(rej_msg, ari_length); - memcpy(rej_msg->ari, ari, ari_length); - } - - ret = ib_post_send_mad(msg, NULL); - if (ret) - cm_free_msg(msg); - - return ret; -} - -static void cm_format_paths_from_req(struct cm_req_msg *req_msg, - struct ib_sa_path_rec *primary_path, - struct ib_sa_path_rec *alt_path) -{ - memset(primary_path, 0, sizeof *primary_path); - primary_path->dgid = req_msg->primary_local_gid; - primary_path->sgid = req_msg->primary_remote_gid; - primary_path->dlid = req_msg->primary_local_lid; - primary_path->slid = req_msg->primary_remote_lid; - primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); - primary_path->hop_limit = req_msg->primary_hop_limit; - primary_path->traffic_class = req_msg->primary_traffic_class; - primary_path->reversible = 1; - primary_path->pkey = req_msg->pkey; - primary_path->sl = cm_req_get_primary_sl(req_msg); - primary_path->mtu_selector = IB_SA_EQ; - primary_path->mtu = cm_req_get_path_mtu(req_msg); - primary_path->rate_selector = IB_SA_EQ; - primary_path->rate = cm_req_get_primary_packet_rate(req_msg); - primary_path->packet_life_time_selector = IB_SA_EQ; - primary_path->packet_life_time = - cm_req_get_primary_local_ack_timeout(req_msg); - primary_path->packet_life_time -= (primary_path->packet_life_time > 0); - primary_path->service_id = req_msg->service_id; - - if (req_msg->alt_local_lid) { - memset(alt_path, 0, sizeof *alt_path); - alt_path->dgid = req_msg->alt_local_gid; - alt_path->sgid = req_msg->alt_remote_gid; - alt_path->dlid = req_msg->alt_local_lid; - alt_path->slid = req_msg->alt_remote_lid; - alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); - alt_path->hop_limit = req_msg->alt_hop_limit; - alt_path->traffic_class = req_msg->alt_traffic_class; - alt_path->reversible = 1; - alt_path->pkey = req_msg->pkey; - alt_path->sl = cm_req_get_alt_sl(req_msg); - alt_path->mtu_selector = IB_SA_EQ; - alt_path->mtu = cm_req_get_path_mtu(req_msg); - alt_path->rate_selector = IB_SA_EQ; - alt_path->rate = cm_req_get_alt_packet_rate(req_msg); - alt_path->packet_life_time_selector = IB_SA_EQ; - alt_path->packet_life_time = - cm_req_get_alt_local_ack_timeout(req_msg); - alt_path->packet_life_time -= (alt_path->packet_life_time > 0); - alt_path->service_id = req_msg->service_id; - } -} - -static u16 cm_get_bth_pkey(struct cm_work *work) -{ - struct ib_device *ib_dev = work->port->cm_dev->ib_device; - u8 port_num = work->port->port_num; - u16 pkey_index = work->mad_recv_wc->wc->pkey_index; - u16 pkey; - int ret; - - ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey); - if (ret) { - dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n", - port_num, pkey_index, ret); - return 0; - } - - return pkey; -} - -static void cm_format_req_event(struct cm_work *work, - struct cm_id_private *cm_id_priv, - struct ib_cm_id *listen_id) -{ - struct cm_req_msg *req_msg; - struct ib_cm_req_event_param *param; - - req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - param = &work->cm_event.param.req_rcvd; - param->listen_id = listen_id; - param->bth_pkey = cm_get_bth_pkey(work); - param->port = cm_id_priv->av.port->port_num; - param->primary_path = &work->path[0]; - if (req_msg->alt_local_lid) - param->alternate_path = &work->path[1]; - else - param->alternate_path = NULL; - param->remote_ca_guid = req_msg->local_ca_guid; - param->remote_qkey = be32_to_cpu(req_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); - param->qp_type = cm_req_get_qp_type(req_msg); - param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); - param->responder_resources = cm_req_get_init_depth(req_msg); - param->initiator_depth = cm_req_get_resp_res(req_msg); - param->local_cm_response_timeout = - cm_req_get_remote_resp_timeout(req_msg); - param->flow_control = cm_req_get_flow_ctrl(req_msg); - param->remote_cm_response_timeout = - cm_req_get_local_resp_timeout(req_msg); - param->retry_count = cm_req_get_retry_count(req_msg); - param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); - param->srq = cm_req_get_srq(req_msg); - work->cm_event.private_data = &req_msg->private_data; -} - -static void cm_process_work(struct cm_id_private *cm_id_priv, - struct cm_work *work) -{ - int ret; - - /* We will typically only have the current event to report. */ - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); - cm_free_work(work); - - while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { - spin_lock_irq(&cm_id_priv->lock); - work = cm_dequeue_work(cm_id_priv); - spin_unlock_irq(&cm_id_priv->lock); - BUG_ON(!work); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, - &work->cm_event); - cm_free_work(work); - } - cm_deref_id(cm_id_priv); - if (ret) - cm_destroy_id(&cm_id_priv->id, ret); -} - -static void cm_format_mra(struct cm_mra_msg *mra_msg, - struct cm_id_private *cm_id_priv, - enum cm_msg_response msg_mraed, u8 service_timeout, - const void *private_data, u8 private_data_len) -{ - cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); - cm_mra_set_msg_mraed(mra_msg, msg_mraed); - mra_msg->local_comm_id = cm_id_priv->id.local_id; - mra_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_mra_set_service_timeout(mra_msg, service_timeout); - - if (private_data && private_data_len) - memcpy(mra_msg->private_data, private_data, private_data_len); -} - -static void cm_format_rej(struct cm_rej_msg *rej_msg, - struct cm_id_private *cm_id_priv, - enum ib_cm_rej_reason reason, - void *ari, - u8 ari_length, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); - rej_msg->remote_comm_id = cm_id_priv->id.remote_id; - - switch(cm_id_priv->id.state) { - case IB_CM_REQ_RCVD: - rej_msg->local_comm_id = 0; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); - break; - case IB_CM_MRA_REQ_SENT: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); - break; - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); - break; - default: - rej_msg->local_comm_id = cm_id_priv->id.local_id; - cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); - break; - } - - rej_msg->reason = cpu_to_be16(reason); - if (ari && ari_length) { - cm_rej_set_reject_info_len(rej_msg, ari_length); - memcpy(rej_msg->ari, ari, ari_length); - } - - if (private_data && private_data_len) - memcpy(rej_msg->private_data, private_data, private_data_len); -} - -static void cm_dup_req_handler(struct cm_work *work, - struct cm_id_private *cm_id_priv) -{ - struct ib_mad_send_buf *msg = NULL; - int ret; - - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_REQ_COUNTER]); - - /* Quick state check to discard duplicate REQs. */ - if (cm_id_priv->id.state == IB_CM_REQ_RCVD) - return; - - ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); - if (ret) - return; - - spin_lock_irq(&cm_id_priv->lock); - switch (cm_id_priv->id.state) { - case IB_CM_MRA_REQ_SENT: - cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, - cm_id_priv->private_data, - cm_id_priv->private_data_len); - break; - case IB_CM_TIMEWAIT: - cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, - IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); - break; - default: - goto unlock; - } - spin_unlock_irq(&cm_id_priv->lock); - - ret = ib_post_send_mad(msg, NULL); - if (ret) - goto free; - return; - -unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); -} - -static struct cm_id_private * cm_match_req(struct cm_work *work, - struct cm_id_private *cm_id_priv) -{ - struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; - struct cm_timewait_info *timewait_info; - struct cm_req_msg *req_msg; - - req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - - /* Check for possible duplicate REQ. */ - spin_lock_irq(&cm.lock); - timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); - if (timewait_info) { - cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, - timewait_info->work.remote_id); - spin_unlock_irq(&cm.lock); - if (cur_cm_id_priv) { - cm_dup_req_handler(work, cur_cm_id_priv); - cm_deref_id(cur_cm_id_priv); - } - return NULL; - } - - /* Check for stale connections. */ - timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); - if (timewait_info) { - cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irq(&cm.lock); - cm_issue_rej(work->port, work->mad_recv_wc, - IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, - NULL, 0); - return NULL; - } - - /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, - req_msg->service_id); - if (!listen_cm_id_priv) { - cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irq(&cm.lock); - cm_issue_rej(work->port, work->mad_recv_wc, - IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, - NULL, 0); - goto out; - } - atomic_inc(&listen_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); - cm_id_priv->id.state = IB_CM_REQ_RCVD; - atomic_inc(&cm_id_priv->work_count); - spin_unlock_irq(&cm.lock); -out: - return listen_cm_id_priv; -} - -/* - * Work-around for inter-subnet connections. If the LIDs are permissive, - * we need to override the LID/SL data in the REQ with the LID information - * in the work completion. - */ -static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) -{ - if (!cm_req_get_primary_subnet_local(req_msg)) { - if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = cpu_to_be16(wc->slid); - cm_req_set_primary_sl(req_msg, wc->sl); - } - - if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) - req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); - } - - if (!cm_req_get_alt_subnet_local(req_msg)) { - if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = cpu_to_be16(wc->slid); - cm_req_set_alt_sl(req_msg, wc->sl); - } - - if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) - req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); - } -} - -static int cm_req_handler(struct cm_work *work) -{ - struct ib_cm_id *cm_id; - struct cm_id_private *cm_id_priv, *listen_cm_id_priv; - struct cm_req_msg *req_msg; - union ib_gid gid; - struct ib_gid_attr gid_attr; - int ret; - - req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - - cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); - if (IS_ERR(cm_id)) - return PTR_ERR(cm_id); - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - cm_id_priv->id.remote_id = req_msg->local_comm_id; - cm_init_av_for_response(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> - id.local_id); - if (IS_ERR(cm_id_priv->timewait_info)) { - ret = PTR_ERR(cm_id_priv->timewait_info); - goto destroy; - } - cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; - cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); - - listen_cm_id_priv = cm_match_req(work, cm_id_priv); - if (!listen_cm_id_priv) { - ret = -EINVAL; - kfree(cm_id_priv->timewait_info); - goto destroy; - } - - cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; - cm_id_priv->id.context = listen_cm_id_priv->id.context; - cm_id_priv->id.service_id = req_msg->service_id; - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - - cm_process_routed_req(req_msg, work->mad_recv_wc->wc); - cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); - - memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); - work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit; - ret = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, - cm_id_priv->av.ah_attr.grh.sgid_index, - &gid, &gid_attr); - if (!ret) { - if (gid_attr.ndev) { - work->path[0].ifindex = gid_attr.ndev->if_index; - work->path[0].net = dev_net(gid_attr.ndev); - dev_put(gid_attr.ndev); - } - work->path[0].gid_type = gid_attr.gid_type; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, - cm_id_priv); - } - if (ret) { - int err = ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, - &work->path[0].sgid, - &gid_attr); - if (!err && gid_attr.ndev) { - work->path[0].ifindex = gid_attr.ndev->if_index; - work->path[0].net = dev_net(gid_attr.ndev); - dev_put(gid_attr.ndev); - } - work->path[0].gid_type = gid_attr.gid_type; - ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, - &work->path[0].sgid, sizeof work->path[0].sgid, - NULL, 0); - goto rejected; - } - if (req_msg->alt_local_lid) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, - cm_id_priv); - if (ret) { - ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, - &work->path[0].sgid, - sizeof work->path[0].sgid, NULL, 0); - goto rejected; - } - } - cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->timeout_ms = cm_convert_to_ms( - cm_req_get_local_resp_timeout(req_msg)); - cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); - cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); - cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); - cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); - cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); - cm_id_priv->pkey = req_msg->pkey; - cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); - cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); - cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); - cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); - - cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); - cm_process_work(cm_id_priv, work); - cm_deref_id(listen_cm_id_priv); - return 0; - -rejected: - atomic_dec(&cm_id_priv->refcount); - cm_deref_id(listen_cm_id_priv); -destroy: - ib_destroy_cm_id(cm_id); - return ret; -} - -static void cm_format_rep(struct cm_rep_msg *rep_msg, - struct cm_id_private *cm_id_priv, - struct ib_cm_rep_param *param) -{ - cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); - rep_msg->local_comm_id = cm_id_priv->id.local_id; - rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); - rep_msg->resp_resources = param->responder_resources; - cm_rep_set_target_ack_delay(rep_msg, - cm_id_priv->av.port->cm_dev->ack_delay); - cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; - - if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { - rep_msg->initiator_depth = param->initiator_depth; - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); - cm_rep_set_srq(rep_msg, param->srq); - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); - } else { - cm_rep_set_srq(rep_msg, 1); - cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); - } - - if (param->private_data && param->private_data_len) - memcpy(rep_msg->private_data, param->private_data, - param->private_data_len); -} - -int ib_send_cm_rep(struct ib_cm_id *cm_id, - struct ib_cm_rep_param *param) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - struct cm_rep_msg *rep_msg; - unsigned long flags; - int ret; - - if (param->private_data && - param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_REQ_RCVD && - cm_id->state != IB_CM_MRA_REQ_SENT) { - ret = -EINVAL; - goto out; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - rep_msg = (struct cm_rep_msg *) msg->mad; - cm_format_rep(rep_msg, cm_id_priv, param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->state = IB_CM_REP_SENT; - cm_id_priv->msg = msg; - cm_id_priv->initiator_depth = param->initiator_depth; - cm_id_priv->responder_resources = param->responder_resources; - cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); - -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_rep); - -static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, - struct cm_id_private *cm_id_priv, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); - rtu_msg->local_comm_id = cm_id_priv->id.local_id; - rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; - - if (private_data && private_data_len) - memcpy(rtu_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_rtu(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - void *data; - int ret; - - if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) - return -EINVAL; - - data = cm_copy_private_data(private_data, private_data_len); - if (IS_ERR(data)) - return PTR_ERR(data); - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_REP_RCVD && - cm_id->state != IB_CM_MRA_REP_SENT) { - ret = -EINVAL; - goto error; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error; - - cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, - private_data, private_data_len); - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - kfree(data); - return ret; - } - - cm_id->state = IB_CM_ESTABLISHED; - cm_set_private_data(cm_id_priv, data, private_data_len); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return 0; - -error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_rtu); - -static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) -{ - struct cm_rep_msg *rep_msg; - struct ib_cm_rep_event_param *param; - - rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; - param = &work->cm_event.param.rep_rcvd; - param->remote_ca_guid = rep_msg->local_ca_guid; - param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); - param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); - param->responder_resources = rep_msg->initiator_depth; - param->initiator_depth = rep_msg->resp_resources; - param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); - param->failover_accepted = cm_rep_get_failover(rep_msg); - param->flow_control = cm_rep_get_flow_ctrl(rep_msg); - param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); - param->srq = cm_rep_get_srq(rep_msg); - work->cm_event.private_data = &rep_msg->private_data; -} - -static void cm_dup_rep_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_rep_msg *rep_msg; - struct ib_mad_send_buf *msg = NULL; - int ret; - - rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, - rep_msg->local_comm_id); - if (!cm_id_priv) - return; - - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_REP_COUNTER]); - ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); - if (ret) - goto deref; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state == IB_CM_ESTABLISHED) - cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, - cm_id_priv->private_data, - cm_id_priv->private_data_len); - else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) - cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, - cm_id_priv->private_data, - cm_id_priv->private_data_len); - else - goto unlock; - spin_unlock_irq(&cm_id_priv->lock); - - ret = ib_post_send_mad(msg, NULL); - if (ret) - goto free; - goto deref; - -unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); -deref: cm_deref_id(cm_id_priv); -} - -static int cm_rep_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_rep_msg *rep_msg; - int ret; - - rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); - if (!cm_id_priv) { - cm_dup_rep_handler(work); - return -EINVAL; - } - - cm_format_rep_event(work, cm_id_priv->qp_type); - - spin_lock_irq(&cm_id_priv->lock); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - break; - default: - spin_unlock_irq(&cm_id_priv->lock); - ret = -EINVAL; - goto error; - } - - cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; - cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); - - spin_lock(&cm.lock); - /* Check for duplicate REP. */ - if (cm_insert_remote_id(cm_id_priv->timewait_info)) { - spin_unlock(&cm.lock); - spin_unlock_irq(&cm_id_priv->lock); - ret = -EINVAL; - goto error; - } - /* Check for a stale connection. */ - if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { - rb_erase(&cm_id_priv->timewait_info->remote_id_node, - &cm.remote_id_table); - cm_id_priv->timewait_info->inserted_remote_id = 0; - spin_unlock(&cm.lock); - spin_unlock_irq(&cm_id_priv->lock); - cm_issue_rej(work->port, work->mad_recv_wc, - IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, - NULL, 0); - ret = -EINVAL; - goto error; - } - spin_unlock(&cm.lock); - - cm_id_priv->id.state = IB_CM_REP_RCVD; - cm_id_priv->id.remote_id = rep_msg->local_comm_id; - cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); - cm_id_priv->initiator_depth = rep_msg->resp_resources; - cm_id_priv->responder_resources = rep_msg->initiator_depth; - cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); - cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); - cm_id_priv->av.timeout = - cm_ack_timeout(cm_id_priv->target_ack_delay, - cm_id_priv->av.timeout - 1); - cm_id_priv->alt_av.timeout = - cm_ack_timeout(cm_id_priv->target_ack_delay, - cm_id_priv->alt_av.timeout - 1); - - /* todo: handle peer_to_peer */ - - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; - -error: - cm_deref_id(cm_id_priv); - return ret; -} - -static int cm_establish_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - int ret; - - /* See comment in cm_establish about lookup. */ - cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); - if (!cm_id_priv) - return -EINVAL; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { - spin_unlock_irq(&cm_id_priv->lock); - goto out; - } - - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static int cm_rtu_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_rtu_msg *rtu_msg; - int ret; - - rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, - rtu_msg->local_comm_id); - if (!cm_id_priv) - return -EINVAL; - - work->cm_event.private_data = &rtu_msg->private_data; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_REP_SENT && - cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { - spin_unlock_irq(&cm_id_priv->lock); - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_RTU_COUNTER]); - goto out; - } - cm_id_priv->id.state = IB_CM_ESTABLISHED; - - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, - struct cm_id_private *cm_id_priv, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); - dreq_msg->local_comm_id = cm_id_priv->id.local_id; - dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); - - if (private_data && private_data_len) - memcpy(dreq_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_dreq(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED) { - ret = -EINVAL; - goto out; - } - - if (cm_id->lap_state == IB_CM_LAP_SENT || - cm_id->lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) { - cm_enter_timewait(cm_id_priv); - goto out; - } - - cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, - private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - cm_enter_timewait(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->state = IB_CM_DREQ_SENT; - cm_id_priv->msg = msg; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_dreq); - -static void cm_format_drep(struct cm_drep_msg *drep_msg, - struct cm_id_private *cm_id_priv, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); - drep_msg->local_comm_id = cm_id_priv->id.local_id; - drep_msg->remote_comm_id = cm_id_priv->id.remote_id; - - if (private_data && private_data_len) - memcpy(drep_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_drep(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - void *data; - int ret; - - if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) - return -EINVAL; - - data = cm_copy_private_data(private_data, private_data_len); - if (IS_ERR(data)) - return PTR_ERR(data); - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_DREQ_RCVD) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - return -EINVAL; - } - - cm_set_private_data(cm_id_priv, data, private_data_len); - cm_enter_timewait(cm_id_priv); - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, - private_data, private_data_len); - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_drep); - -static int cm_issue_drep(struct cm_port *port, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct ib_mad_send_buf *msg = NULL; - struct cm_dreq_msg *dreq_msg; - struct cm_drep_msg *drep_msg; - int ret; - - ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); - if (ret) - return ret; - - dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; - drep_msg = (struct cm_drep_msg *) msg->mad; - - cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); - drep_msg->remote_comm_id = dreq_msg->local_comm_id; - drep_msg->local_comm_id = dreq_msg->remote_comm_id; - - ret = ib_post_send_mad(msg, NULL); - if (ret) - cm_free_msg(msg); - - return ret; -} - -static int cm_dreq_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_dreq_msg *dreq_msg; - struct ib_mad_send_buf *msg = NULL; - int ret; - - dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, - dreq_msg->local_comm_id); - if (!cm_id_priv) { - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_DREQ_COUNTER]); - cm_issue_drep(work->port, work->mad_recv_wc); - return -EINVAL; - } - - work->cm_event.private_data = &dreq_msg->private_data; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) - goto unlock; - - switch (cm_id_priv->id.state) { - case IB_CM_REP_SENT: - case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - break; - case IB_CM_ESTABLISHED: - if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || - cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - break; - case IB_CM_MRA_REP_RCVD: - break; - case IB_CM_TIMEWAIT: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_DREQ_COUNTER]); - if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) - goto unlock; - - cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, - cm_id_priv->private_data, - cm_id_priv->private_data_len); - spin_unlock_irq(&cm_id_priv->lock); - - if (ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); - goto deref; - case IB_CM_DREQ_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_DREQ_COUNTER]); - goto unlock; - default: - goto unlock; - } - cm_id_priv->id.state = IB_CM_DREQ_RCVD; - cm_id_priv->tid = dreq_msg->hdr.tid; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; - -unlock: spin_unlock_irq(&cm_id_priv->lock); -deref: cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static int cm_drep_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_drep_msg *drep_msg; - int ret; - - drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, - drep_msg->local_comm_id); - if (!cm_id_priv) - return -EINVAL; - - work->cm_event.private_data = &drep_msg->private_data; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_DREQ_SENT && - cm_id_priv->id.state != IB_CM_DREQ_RCVD) { - spin_unlock_irq(&cm_id_priv->lock); - goto out; - } - cm_enter_timewait(cm_id_priv); - - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -int ib_send_cm_rej(struct ib_cm_id *cm_id, - enum ib_cm_rej_reason reason, - void *ari, - u8 ari_length, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || - (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id->state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - case IB_CM_REQ_RCVD: - case IB_CM_MRA_REQ_SENT: - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - ret = cm_alloc_msg(cm_id_priv, &msg); - if (!ret) - cm_format_rej((struct cm_rej_msg *) msg->mad, - cm_id_priv, reason, ari, ari_length, - private_data, private_data_len); - - cm_reset_to_idle(cm_id_priv); - break; - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - ret = cm_alloc_msg(cm_id_priv, &msg); - if (!ret) - cm_format_rej((struct cm_rej_msg *) msg->mad, - cm_id_priv, reason, ari, ari_length, - private_data, private_data_len); - - cm_enter_timewait(cm_id_priv); - break; - default: - ret = -EINVAL; - goto out; - } - - if (ret) - goto out; - - ret = ib_post_send_mad(msg, NULL); - if (ret) - cm_free_msg(msg); - -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_rej); - -static void cm_format_rej_event(struct cm_work *work) -{ - struct cm_rej_msg *rej_msg; - struct ib_cm_rej_event_param *param; - - rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; - param = &work->cm_event.param.rej_rcvd; - param->ari = rej_msg->ari; - param->ari_length = cm_rej_get_reject_info_len(rej_msg); - param->reason = __be16_to_cpu(rej_msg->reason); - work->cm_event.private_data = &rej_msg->private_data; -} - -static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) -{ - struct cm_timewait_info *timewait_info; - struct cm_id_private *cm_id_priv; - __be32 remote_id; - - remote_id = rej_msg->local_comm_id; - - if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { - spin_lock_irq(&cm.lock); - timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), - remote_id); - if (!timewait_info) { - spin_unlock_irq(&cm.lock); - return NULL; - } - cm_id_priv = idr_find(&cm.local_id_table, (__force int) - (timewait_info->work.local_id ^ - cm.random_id_operand)); - if (cm_id_priv) { - if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); - else - cm_id_priv = NULL; - } - spin_unlock_irq(&cm.lock); - } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) - cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); - else - cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); - - return cm_id_priv; -} - -static int cm_rej_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_rej_msg *rej_msg; - int ret; - - rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_rejected_id(rej_msg); - if (!cm_id_priv) - return -EINVAL; - - cm_format_rej_event(work); - - spin_lock_irq(&cm_id_priv->lock); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - /* fall through */ - case IB_CM_REQ_RCVD: - case IB_CM_MRA_REQ_SENT: - if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) - cm_enter_timewait(cm_id_priv); - else - cm_reset_to_idle(cm_id_priv); - break; - case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - /* fall through */ - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - cm_enter_timewait(cm_id_priv); - break; - case IB_CM_ESTABLISHED: - if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || - cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { - if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg); - cm_enter_timewait(cm_id_priv); - break; - } - /* fall through */ - default: - spin_unlock_irq(&cm_id_priv->lock); - ret = -EINVAL; - goto out; - } - - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -int ib_send_cm_mra(struct ib_cm_id *cm_id, - u8 service_timeout, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - enum ib_cm_state cm_state; - enum ib_cm_lap_state lap_state; - enum cm_msg_response msg_response; - void *data; - unsigned long flags; - int ret; - - if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) - return -EINVAL; - - data = cm_copy_private_data(private_data, private_data_len); - if (IS_ERR(data)) - return PTR_ERR(data); - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch(cm_id_priv->id.state) { - case IB_CM_REQ_RCVD: - cm_state = IB_CM_MRA_REQ_SENT; - lap_state = cm_id->lap_state; - msg_response = CM_MSG_RESPONSE_REQ; - break; - case IB_CM_REP_RCVD: - cm_state = IB_CM_MRA_REP_SENT; - lap_state = cm_id->lap_state; - msg_response = CM_MSG_RESPONSE_REP; - break; - case IB_CM_ESTABLISHED: - if (cm_id->lap_state == IB_CM_LAP_RCVD) { - cm_state = cm_id->state; - lap_state = IB_CM_MRA_LAP_SENT; - msg_response = CM_MSG_RESPONSE_OTHER; - break; - } - default: - ret = -EINVAL; - goto error1; - } - - if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error1; - - cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - msg_response, service_timeout, - private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) - goto error2; - } - - cm_id->state = cm_state; - cm_id->lap_state = lap_state; - cm_id_priv->service_timeout = service_timeout; - cm_set_private_data(cm_id_priv, data, private_data_len); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return 0; - -error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - return ret; - -error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - cm_free_msg(msg); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_mra); - -static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) -{ - switch (cm_mra_get_msg_mraed(mra_msg)) { - case CM_MSG_RESPONSE_REQ: - return cm_acquire_id(mra_msg->remote_comm_id, 0); - case CM_MSG_RESPONSE_REP: - case CM_MSG_RESPONSE_OTHER: - return cm_acquire_id(mra_msg->remote_comm_id, - mra_msg->local_comm_id); - default: - return NULL; - } -} - -static int cm_mra_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_mra_msg *mra_msg; - int timeout, ret; - - mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_mraed_id(mra_msg); - if (!cm_id_priv) - return -EINVAL; - - work->cm_event.private_data = &mra_msg->private_data; - work->cm_event.param.mra_rcvd.service_timeout = - cm_mra_get_service_timeout(mra_msg); - timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + - cm_convert_to_ms(cm_id_priv->av.timeout); - - spin_lock_irq(&cm_id_priv->lock); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_SENT: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) - goto out; - cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; - break; - case IB_CM_REP_SENT: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) - goto out; - cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; - break; - case IB_CM_ESTABLISHED: - if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || - cm_id_priv->id.lap_state != IB_CM_LAP_SENT || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) { - if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - atomic_long_inc(&work->port-> - counter_group[CM_RECV_DUPLICATES]. - counter[CM_MRA_COUNTER]); - goto out; - } - cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; - break; - case IB_CM_MRA_REQ_RCVD: - case IB_CM_MRA_REP_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_MRA_COUNTER]); - /* fall through */ - default: - goto out; - } - - cm_id_priv->msg->context[1] = (void *) (unsigned long) - cm_id_priv->id.state; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - spin_unlock_irq(&cm_id_priv->lock); - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static void cm_format_lap(struct cm_lap_msg *lap_msg, - struct cm_id_private *cm_id_priv, - struct ib_sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); - lap_msg->local_comm_id = cm_id_priv->id.local_id; - lap_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); - /* todo: need remote CM response timeout */ - cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); - lap_msg->alt_local_lid = alternate_path->slid; - lap_msg->alt_remote_lid = alternate_path->dlid; - lap_msg->alt_local_gid = alternate_path->sgid; - lap_msg->alt_remote_gid = alternate_path->dgid; - cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); - cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); - lap_msg->alt_hop_limit = alternate_path->hop_limit; - cm_lap_set_packet_rate(lap_msg, alternate_path->rate); - cm_lap_set_sl(lap_msg, alternate_path->sl); - cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ - cm_lap_set_local_ack_timeout(lap_msg, - cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, - alternate_path->packet_life_time)); - - if (private_data && private_data_len) - memcpy(lap_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct ib_sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_UNINIT && - cm_id->lap_state != IB_CM_LAP_IDLE)) { - ret = -EINVAL; - goto out; - } - - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, - cm_id_priv); - if (ret) - goto out; - cm_id_priv->alt_av.timeout = - cm_ack_timeout(cm_id_priv->target_ack_delay, - cm_id_priv->alt_av.timeout - 1); - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, - alternate_path, private_data, private_data_len); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_SENT; - cm_id_priv->msg = msg; - -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_lap); - -static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, - struct ib_sa_path_rec *path, - struct cm_lap_msg *lap_msg) -{ - memset(path, 0, sizeof *path); - path->dgid = lap_msg->alt_local_gid; - path->sgid = lap_msg->alt_remote_gid; - path->dlid = lap_msg->alt_local_lid; - path->slid = lap_msg->alt_remote_lid; - path->flow_label = cm_lap_get_flow_label(lap_msg); - path->hop_limit = lap_msg->alt_hop_limit; - path->traffic_class = cm_lap_get_traffic_class(lap_msg); - path->reversible = 1; - path->pkey = cm_id_priv->pkey; - path->sl = cm_lap_get_sl(lap_msg); - path->mtu_selector = IB_SA_EQ; - path->mtu = cm_id_priv->path_mtu; - path->rate_selector = IB_SA_EQ; - path->rate = cm_lap_get_packet_rate(lap_msg); - path->packet_life_time_selector = IB_SA_EQ; - path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); - path->packet_life_time -= (path->packet_life_time > 0); -} - -static int cm_lap_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_lap_msg *lap_msg; - struct ib_cm_lap_event_param *param; - struct ib_mad_send_buf *msg = NULL; - int ret; - - /* todo: verify LAP request and send reject APR if invalid. */ - lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, - lap_msg->local_comm_id); - if (!cm_id_priv) - return -EINVAL; - - param = &work->cm_event.param.lap_rcvd; - param->alternate_path = &work->path[0]; - cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); - work->cm_event.private_data = &lap_msg->private_data; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_ESTABLISHED) - goto unlock; - - switch (cm_id_priv->id.lap_state) { - case IB_CM_LAP_UNINIT: - case IB_CM_LAP_IDLE: - break; - case IB_CM_MRA_LAP_SENT: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_LAP_COUNTER]); - if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) - goto unlock; - - cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - CM_MSG_RESPONSE_OTHER, - cm_id_priv->service_timeout, - cm_id_priv->private_data, - cm_id_priv->private_data_len); - spin_unlock_irq(&cm_id_priv->lock); - - if (ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); - goto deref; - case IB_CM_LAP_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_LAP_COUNTER]); - goto unlock; - default: - goto unlock; - } - - cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; - cm_id_priv->tid = lap_msg->hdr.tid; - cm_init_av_for_response(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, - cm_id_priv); - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; - -unlock: spin_unlock_irq(&cm_id_priv->lock); -deref: cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static void cm_format_apr(struct cm_apr_msg *apr_msg, - struct cm_id_private *cm_id_priv, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); - apr_msg->local_comm_id = cm_id_priv->id.local_id; - apr_msg->remote_comm_id = cm_id_priv->id.remote_id; - apr_msg->ap_status = (u8) status; - - if (info && info_length) { - apr_msg->info_length = info_length; - memcpy(apr_msg->info, info, info_length); - } - - if (private_data && private_data_len) - memcpy(apr_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_apr(struct ib_cm_id *cm_id, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || - (info && info_length > IB_CM_APR_INFO_LENGTH)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_RCVD && - cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { - ret = -EINVAL; - goto out; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, - info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_IDLE; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_apr); - -static int cm_apr_handler(struct cm_work *work) -{ - struct cm_id_private *cm_id_priv; - struct cm_apr_msg *apr_msg; - int ret; - - apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, - apr_msg->local_comm_id); - if (!cm_id_priv) - return -EINVAL; /* Unmatched reply. */ - - work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; - work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; - work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; - work->cm_event.private_data = &apr_msg->private_data; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_ESTABLISHED || - (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && - cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { - spin_unlock_irq(&cm_id_priv->lock); - goto out; - } - cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - cm_id_priv->msg = NULL; - - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static int cm_timewait_handler(struct cm_work *work) -{ - struct cm_timewait_info *timewait_info; - struct cm_id_private *cm_id_priv; - int ret; - - timewait_info = (struct cm_timewait_info *)work; - spin_lock_irq(&cm.lock); - list_del(&timewait_info->list); - spin_unlock_irq(&cm.lock); - - cm_id_priv = cm_acquire_id(timewait_info->work.local_id, - timewait_info->work.remote_id); - if (!cm_id_priv) - return -EINVAL; - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_TIMEWAIT || - cm_id_priv->remote_qpn != timewait_info->remote_qpn) { - spin_unlock_irq(&cm_id_priv->lock); - goto out; - } - cm_id_priv->id.state = IB_CM_IDLE; - ret = atomic_inc_and_test(&cm_id_priv->work_count); - if (!ret) - list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irq(&cm_id_priv->lock); - - if (ret) - cm_process_work(cm_id_priv, work); - else - cm_deref_id(cm_id_priv); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, - struct cm_id_private *cm_id_priv, - struct ib_cm_sidr_req_param *param) -{ - cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, - cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); - sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = param->path->pkey; - sidr_req_msg->service_id = param->service_id; - - if (param->private_data && param->private_data_len) - memcpy(sidr_req_msg->private_data, param->private_data, - param->private_data_len); -} - -int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, - struct ib_cm_sidr_req_param *param) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if (!param->path || (param->private_data && - param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); - if (ret) - goto out; - - cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); - cm_id_priv->timeout_ms = param->timeout_ms; - cm_id_priv->max_cm_retries = param->max_cm_retries; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, - param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state == IB_CM_IDLE) - ret = ib_post_send_mad(msg, NULL); - else - ret = -EINVAL; - - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - goto out; - } - cm_id->state = IB_CM_SIDR_REQ_SENT; - cm_id_priv->msg = msg; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); -out: - return ret; -} -EXPORT_SYMBOL(ib_send_cm_sidr_req); - -static void cm_format_sidr_req_event(struct cm_work *work, - struct ib_cm_id *listen_id) -{ - struct cm_sidr_req_msg *sidr_req_msg; - struct ib_cm_sidr_req_event_param *param; - - sidr_req_msg = (struct cm_sidr_req_msg *) - work->mad_recv_wc->recv_buf.mad; - param = &work->cm_event.param.sidr_req_rcvd; - param->pkey = __be16_to_cpu(sidr_req_msg->pkey); - param->listen_id = listen_id; - param->service_id = sidr_req_msg->service_id; - param->bth_pkey = cm_get_bth_pkey(work); - param->port = work->port->port_num; - work->cm_event.private_data = &sidr_req_msg->private_data; -} - -static int cm_sidr_req_handler(struct cm_work *work) -{ - struct ib_cm_id *cm_id; - struct cm_id_private *cm_id_priv, *cur_cm_id_priv; - struct cm_sidr_req_msg *sidr_req_msg; - struct ib_wc *wc; - - cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); - if (IS_ERR(cm_id)) - return PTR_ERR(cm_id); - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - - /* Record SGID/SLID and request ID for lookup. */ - sidr_req_msg = (struct cm_sidr_req_msg *) - work->mad_recv_wc->recv_buf.mad; - wc = work->mad_recv_wc->wc; - cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); - cm_id_priv->av.dgid.global.interface_id = 0; - cm_init_av_for_response(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - cm_id_priv->id.remote_id = sidr_req_msg->request_id; - cm_id_priv->tid = sidr_req_msg->hdr.tid; - atomic_inc(&cm_id_priv->work_count); - - spin_lock_irq(&cm.lock); - cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); - if (cur_cm_id_priv) { - spin_unlock_irq(&cm.lock); - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_SIDR_REQ_COUNTER]); - goto out; /* Duplicate message. */ - } - cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; - cur_cm_id_priv = cm_find_listen(cm_id->device, - sidr_req_msg->service_id); - if (!cur_cm_id_priv) { - spin_unlock_irq(&cm.lock); - cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); - goto out; /* No match. */ - } - atomic_inc(&cur_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); - spin_unlock_irq(&cm.lock); - - cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; - cm_id_priv->id.context = cur_cm_id_priv->id.context; - cm_id_priv->id.service_id = sidr_req_msg->service_id; - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - - cm_format_sidr_req_event(work, &cur_cm_id_priv->id); - cm_process_work(cm_id_priv, work); - cm_deref_id(cur_cm_id_priv); - return 0; -out: - ib_destroy_cm_id(&cm_id_priv->id); - return -EINVAL; -} - -static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, - struct cm_id_private *cm_id_priv, - struct ib_cm_sidr_rep_param *param) -{ - cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, - cm_id_priv->tid); - sidr_rep_msg->request_id = cm_id_priv->id.remote_id; - sidr_rep_msg->status = param->status; - cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); - sidr_rep_msg->service_id = cm_id_priv->id.service_id; - sidr_rep_msg->qkey = cpu_to_be32(param->qkey); - - if (param->info && param->info_length) - memcpy(sidr_rep_msg->info, param->info, param->info_length); - - if (param->private_data && param->private_data_len) - memcpy(sidr_rep_msg->private_data, param->private_data, - param->private_data_len); -} - -int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, - struct ib_cm_sidr_rep_param *param) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || - (param->private_data && - param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { - ret = -EINVAL; - goto error; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error; - - cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, - param); - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - cm_id->state = IB_CM_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - spin_lock_irqsave(&cm.lock, flags); - if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { - rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); - RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); - } - spin_unlock_irqrestore(&cm.lock, flags); - return 0; - -error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_sidr_rep); - -static void cm_format_sidr_rep_event(struct cm_work *work) -{ - struct cm_sidr_rep_msg *sidr_rep_msg; - struct ib_cm_sidr_rep_event_param *param; - - sidr_rep_msg = (struct cm_sidr_rep_msg *) - work->mad_recv_wc->recv_buf.mad; - param = &work->cm_event.param.sidr_rep_rcvd; - param->status = sidr_rep_msg->status; - param->qkey = be32_to_cpu(sidr_rep_msg->qkey); - param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); - param->info = &sidr_rep_msg->info; - param->info_len = sidr_rep_msg->info_length; - work->cm_event.private_data = &sidr_rep_msg->private_data; -} - -static int cm_sidr_rep_handler(struct cm_work *work) -{ - struct cm_sidr_rep_msg *sidr_rep_msg; - struct cm_id_private *cm_id_priv; - - sidr_rep_msg = (struct cm_sidr_rep_msg *) - work->mad_recv_wc->recv_buf.mad; - cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); - if (!cm_id_priv) - return -EINVAL; /* Unmatched reply. */ - - spin_lock_irq(&cm_id_priv->lock); - if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { - spin_unlock_irq(&cm_id_priv->lock); - goto out; - } - cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irq(&cm_id_priv->lock); - - cm_format_sidr_rep_event(work); - cm_process_work(cm_id_priv, work); - return 0; -out: - cm_deref_id(cm_id_priv); - return -EINVAL; -} - -static void cm_process_send_error(struct ib_mad_send_buf *msg, - enum ib_wc_status wc_status) -{ - struct cm_id_private *cm_id_priv; - struct ib_cm_event cm_event; - enum ib_cm_state state; - int ret; - - memset(&cm_event, 0, sizeof cm_event); - cm_id_priv = msg->context[0]; - - /* Discard old sends or ones without a response. */ - spin_lock_irq(&cm_id_priv->lock); - state = (enum ib_cm_state) (unsigned long) msg->context[1]; - if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) - goto discard; - - switch (state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - cm_reset_to_idle(cm_id_priv); - cm_event.event = IB_CM_REQ_ERROR; - break; - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - cm_reset_to_idle(cm_id_priv); - cm_event.event = IB_CM_REP_ERROR; - break; - case IB_CM_DREQ_SENT: - cm_enter_timewait(cm_id_priv); - cm_event.event = IB_CM_DREQ_ERROR; - break; - case IB_CM_SIDR_REQ_SENT: - cm_id_priv->id.state = IB_CM_IDLE; - cm_event.event = IB_CM_SIDR_REQ_ERROR; - break; - default: - goto discard; - } - spin_unlock_irq(&cm_id_priv->lock); - cm_event.param.send_status = wc_status; - - /* No other events can occur on the cm_id at this point. */ - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); - cm_free_msg(msg); - if (ret) - ib_destroy_cm_id(&cm_id_priv->id); - return; -discard: - spin_unlock_irq(&cm_id_priv->lock); - cm_free_msg(msg); -} - -static void cm_send_handler(struct ib_mad_agent *mad_agent, - struct ib_mad_send_wc *mad_send_wc) -{ - struct ib_mad_send_buf *msg = mad_send_wc->send_buf; - struct cm_port *port; - u16 attr_index; - - port = mad_agent->context; - attr_index = be16_to_cpu(((struct ib_mad_hdr *) - msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; - - /* - * If the send was in response to a received message (context[0] is not - * set to a cm_id), and is not a REJ, then it is a send that was - * manually retried. - */ - if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) - msg->retries = 1; - - atomic_long_add(1 + msg->retries, - &port->counter_group[CM_XMIT].counter[attr_index]); - if (msg->retries) - atomic_long_add(msg->retries, - &port->counter_group[CM_XMIT_RETRIES]. - counter[attr_index]); - - switch (mad_send_wc->status) { - case IB_WC_SUCCESS: - case IB_WC_WR_FLUSH_ERR: - cm_free_msg(msg); - break; - default: - if (msg->context[0] && msg->context[1]) - cm_process_send_error(msg, mad_send_wc->status); - else - cm_free_msg(msg); - break; - } -} - -static void cm_work_handler(struct work_struct *_work) -{ - struct cm_work *work = container_of(_work, struct cm_work, work.work); - int ret; - - switch (work->cm_event.event) { - case IB_CM_REQ_RECEIVED: - ret = cm_req_handler(work); - break; - case IB_CM_MRA_RECEIVED: - ret = cm_mra_handler(work); - break; - case IB_CM_REJ_RECEIVED: - ret = cm_rej_handler(work); - break; - case IB_CM_REP_RECEIVED: - ret = cm_rep_handler(work); - break; - case IB_CM_RTU_RECEIVED: - ret = cm_rtu_handler(work); - break; - case IB_CM_USER_ESTABLISHED: - ret = cm_establish_handler(work); - break; - case IB_CM_DREQ_RECEIVED: - ret = cm_dreq_handler(work); - break; - case IB_CM_DREP_RECEIVED: - ret = cm_drep_handler(work); - break; - case IB_CM_SIDR_REQ_RECEIVED: - ret = cm_sidr_req_handler(work); - break; - case IB_CM_SIDR_REP_RECEIVED: - ret = cm_sidr_rep_handler(work); - break; - case IB_CM_LAP_RECEIVED: - ret = cm_lap_handler(work); - break; - case IB_CM_APR_RECEIVED: - ret = cm_apr_handler(work); - break; - case IB_CM_TIMEWAIT_EXIT: - ret = cm_timewait_handler(work); - break; - default: - ret = -EINVAL; - break; - } - if (ret) - cm_free_work(work); -} - -static int cm_establish(struct ib_cm_id *cm_id) -{ - struct cm_id_private *cm_id_priv; - struct cm_work *work; - unsigned long flags; - int ret = 0; - struct cm_device *cm_dev; - - cm_dev = ib_get_client_data(cm_id->device, &cm_client); - if (!cm_dev) - return -ENODEV; - - work = kmalloc(sizeof *work, GFP_ATOMIC); - if (!work) - return -ENOMEM; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id->state) - { - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - cm_id->state = IB_CM_ESTABLISHED; - break; - case IB_CM_ESTABLISHED: - ret = -EISCONN; - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - if (ret) { - kfree(work); - goto out; - } - - /* - * The CM worker thread may try to destroy the cm_id before it - * can execute this work item. To prevent potential deadlock, - * we need to find the cm_id once we're in the context of the - * worker thread, rather than holding a reference on it. - */ - INIT_DELAYED_WORK(&work->work, cm_work_handler); - work->local_id = cm_id->local_id; - work->remote_id = cm_id->remote_id; - work->mad_recv_wc = NULL; - work->cm_event.event = IB_CM_USER_ESTABLISHED; - - /* Check if the device started its remove_one */ - spin_lock_irqsave(&cm.lock, flags); - if (!cm_dev->going_down) { - queue_delayed_work(cm.wq, &work->work, 0); - } else { - kfree(work); - ret = -ENODEV; - } - spin_unlock_irqrestore(&cm.lock, flags); - -out: - return ret; -} - -static int cm_migrate(struct ib_cm_id *cm_id) -{ - struct cm_id_private *cm_id_priv; - struct cm_av tmp_av; - unsigned long flags; - int tmp_send_port_not_ready; - int ret = 0; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state == IB_CM_ESTABLISHED && - (cm_id->lap_state == IB_CM_LAP_UNINIT || - cm_id->lap_state == IB_CM_LAP_IDLE)) { - cm_id->lap_state = IB_CM_LAP_IDLE; - /* Swap address vector */ - tmp_av = cm_id_priv->av; - cm_id_priv->av = cm_id_priv->alt_av; - cm_id_priv->alt_av = tmp_av; - /* Swap port send ready state */ - tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; - cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; - cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; - } else - ret = -EINVAL; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - return ret; -} - -int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) -{ - int ret; - - switch (event) { - case IB_EVENT_COMM_EST: - ret = cm_establish(cm_id); - break; - case IB_EVENT_PATH_MIG: - ret = cm_migrate(cm_id); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(ib_cm_notify); - -static void cm_recv_handler(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, - struct ib_mad_recv_wc *mad_recv_wc) -{ - struct cm_port *port = mad_agent->context; - struct cm_work *work; - enum ib_cm_event_type event; - u16 attr_id; - int paths = 0; - int going_down = 0; - - switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { - case CM_REQ_ATTR_ID: - paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> - alt_local_lid != 0); - event = IB_CM_REQ_RECEIVED; - break; - case CM_MRA_ATTR_ID: - event = IB_CM_MRA_RECEIVED; - break; - case CM_REJ_ATTR_ID: - event = IB_CM_REJ_RECEIVED; - break; - case CM_REP_ATTR_ID: - event = IB_CM_REP_RECEIVED; - break; - case CM_RTU_ATTR_ID: - event = IB_CM_RTU_RECEIVED; - break; - case CM_DREQ_ATTR_ID: - event = IB_CM_DREQ_RECEIVED; - break; - case CM_DREP_ATTR_ID: - event = IB_CM_DREP_RECEIVED; - break; - case CM_SIDR_REQ_ATTR_ID: - event = IB_CM_SIDR_REQ_RECEIVED; - break; - case CM_SIDR_REP_ATTR_ID: - event = IB_CM_SIDR_REP_RECEIVED; - break; - case CM_LAP_ATTR_ID: - paths = 1; - event = IB_CM_LAP_RECEIVED; - break; - case CM_APR_ATTR_ID: - event = IB_CM_APR_RECEIVED; - break; - default: - ib_free_recv_mad(mad_recv_wc); - return; - } - - attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); - atomic_long_inc(&port->counter_group[CM_RECV]. - counter[attr_id - CM_ATTR_ID_OFFSET]); - - work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, - GFP_KERNEL); - if (!work) { - ib_free_recv_mad(mad_recv_wc); - return; - } - - INIT_DELAYED_WORK(&work->work, cm_work_handler); - work->cm_event.event = event; - work->mad_recv_wc = mad_recv_wc; - work->port = port; - - /* Check if the device started its remove_one */ - spin_lock_irq(&cm.lock); - if (!port->cm_dev->going_down) - queue_delayed_work(cm.wq, &work->work, 0); - else - going_down = 1; - spin_unlock_irq(&cm.lock); - - if (going_down) { - kfree(work); - ib_free_recv_mad(mad_recv_wc); - } -} - -static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_SENT: - case IB_CM_MRA_REQ_RCVD: - case IB_CM_REQ_RCVD: - case IB_CM_MRA_REQ_SENT: - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; - if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_ATOMIC; - qp_attr->pkey_index = cm_id_priv->av.pkey_index; - qp_attr->port_num = cm_id_priv->av.port->port_num; - ret = 0; - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} - -static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->id.state) { - case IB_CM_REQ_RCVD: - case IB_CM_MRA_REQ_SENT: - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | - IB_QP_DEST_QPN | IB_QP_RQ_PSN; - qp_attr->ah_attr = cm_id_priv->av.ah_attr; - qp_attr->path_mtu = cm_id_priv->path_mtu; - qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); - qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC || - cm_id_priv->qp_type == IB_QPT_XRC_TGT) { - *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | - IB_QP_MIN_RNR_TIMER; - qp_attr->max_dest_rd_atomic = - cm_id_priv->responder_resources; - qp_attr->min_rnr_timer = 0; - } - if (cm_id_priv->alt_av.ah_attr.dlid) { - *qp_attr_mask |= IB_QP_ALT_PATH; - qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; - qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; - qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; - qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; - } - ret = 0; - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} - -static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id_priv->id.state) { - /* Allow transition to RTS before sending REP */ - case IB_CM_REQ_RCVD: - case IB_CM_MRA_REQ_SENT: - - case IB_CM_REP_RCVD: - case IB_CM_MRA_REP_SENT: - case IB_CM_REP_SENT: - case IB_CM_MRA_REP_RCVD: - case IB_CM_ESTABLISHED: - if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { - *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; - qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - switch (cm_id_priv->qp_type) { - case IB_QPT_RC: - case IB_QPT_XRC_INI: - *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; - /* fall through */ - case IB_QPT_XRC_TGT: - *qp_attr_mask |= IB_QP_TIMEOUT; - qp_attr->timeout = cm_id_priv->av.timeout; - break; - default: - break; - } - if (cm_id_priv->alt_av.ah_attr.dlid) { - *qp_attr_mask |= IB_QP_PATH_MIG_STATE; - qp_attr->path_mig_state = IB_MIG_REARM; - } - } else { - *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; - qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; - qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; - qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; - qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; - qp_attr->path_mig_state = IB_MIG_REARM; - } - ret = 0; - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} - -int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, - struct ib_qp_attr *qp_attr, - int *qp_attr_mask) -{ - struct cm_id_private *cm_id_priv; - int ret; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - switch (qp_attr->qp_state) { - case IB_QPS_INIT: - ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); - break; - case IB_QPS_RTR: - ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); - break; - case IB_QPS_RTS: - ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); - break; - default: - ret = -EINVAL; - break; - } - return ret; -} -EXPORT_SYMBOL(ib_cm_init_qp_attr); - -static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, - char *buf) -{ - struct cm_counter_group *group; - struct cm_counter_attribute *cm_attr; - - group = container_of(obj, struct cm_counter_group, obj); - cm_attr = container_of(attr, struct cm_counter_attribute, attr); - - return sprintf(buf, "%ld\n", - atomic_long_read(&group->counter[cm_attr->index])); -} - -static const struct sysfs_ops cm_counter_ops = { - .show = cm_show_counter -}; - -static struct kobj_type cm_counter_obj_type = { - .sysfs_ops = &cm_counter_ops, - .default_attrs = cm_counter_default_attrs -}; - -static void cm_release_port_obj(struct kobject *obj) -{ - struct cm_port *cm_port; - - cm_port = container_of(obj, struct cm_port, port_obj); - kfree(cm_port); -} - -static struct kobj_type cm_port_obj_type = { - .release = cm_release_port_obj -}; - -static char *cm_devnode(struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); -} - -struct class cm_class = { - .owner = THIS_MODULE, - .name = "infiniband_cm", - .devnode = cm_devnode, -}; -EXPORT_SYMBOL(cm_class); - -static int cm_create_port_fs(struct cm_port *port) -{ - int i, ret; - - ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - &port->cm_dev->device->kobj, - "%d", port->port_num); - if (ret) { - kfree(port); - return ret; - } - - for (i = 0; i < CM_COUNTER_GROUPS; i++) { - ret = kobject_init_and_add(&port->counter_group[i].obj, - &cm_counter_obj_type, - &port->port_obj, - "%s", counter_group_names[i]); - if (ret) - goto error; - } - - return 0; - -error: - while (i--) - kobject_put(&port->counter_group[i].obj); - kobject_put(&port->port_obj); - return ret; - -} - -static void cm_remove_port_fs(struct cm_port *port) -{ - int i; - - for (i = 0; i < CM_COUNTER_GROUPS; i++) - kobject_put(&port->counter_group[i].obj); - - kobject_put(&port->port_obj); -} - -static void cm_add_one(struct ib_device *ib_device) -{ - struct cm_device *cm_dev; - struct cm_port *port; - struct ib_mad_reg_req reg_req = { - .mgmt_class = IB_MGMT_CLASS_CM, - .mgmt_class_version = IB_CM_CLASS_VERSION, - }; - struct ib_port_modify port_modify = { - .set_port_cap_mask = IB_PORT_CM_SUP - }; - unsigned long flags; - int ret; - int count = 0; - u8 i; - - cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * - ib_device->phys_port_cnt, GFP_KERNEL); - if (!cm_dev) - return; - - cm_dev->ib_device = ib_device; - cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; - cm_dev->going_down = 0; - cm_dev->device = device_create(&cm_class, &ib_device->dev, - MKDEV(0, 0), NULL, - "%s", ib_device->name); - if (IS_ERR(cm_dev->device)) { - kfree(cm_dev); - return; - } - - set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { - if (!rdma_cap_ib_cm(ib_device, i)) - continue; - - port = kzalloc(sizeof *port, GFP_KERNEL); - if (!port) - goto error1; - - cm_dev->port[i-1] = port; - port->cm_dev = cm_dev; - port->port_num = i; - - INIT_LIST_HEAD(&port->cm_priv_prim_list); - INIT_LIST_HEAD(&port->cm_priv_altr_list); - - ret = cm_create_port_fs(port); - if (ret) - goto error1; - - port->mad_agent = ib_register_mad_agent(ib_device, i, - IB_QPT_GSI, - ®_req, - 0, - cm_send_handler, - cm_recv_handler, - port, - 0); - if (IS_ERR(port->mad_agent)) - goto error2; - - ret = ib_modify_port(ib_device, i, 0, &port_modify); - if (ret) - goto error3; - - count++; - } - - if (!count) - goto free; - - ib_set_client_data(ib_device, &cm_client, cm_dev); - - write_lock_irqsave(&cm.device_lock, flags); - list_add_tail(&cm_dev->list, &cm.device_list); - write_unlock_irqrestore(&cm.device_lock, flags); - return; - -error3: - ib_unregister_mad_agent(port->mad_agent); -error2: - cm_remove_port_fs(port); -error1: - port_modify.set_port_cap_mask = 0; - port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; - while (--i) { - if (!rdma_cap_ib_cm(ib_device, i)) - continue; - - port = cm_dev->port[i-1]; - ib_modify_port(ib_device, port->port_num, 0, &port_modify); - ib_unregister_mad_agent(port->mad_agent); - cm_remove_port_fs(port); - } -free: - device_unregister(cm_dev->device); - kfree(cm_dev); -} - -static void cm_remove_one(struct ib_device *ib_device, void *client_data) -{ - struct cm_device *cm_dev = client_data; - struct cm_port *port; - struct cm_id_private *cm_id_priv; - struct ib_mad_agent *cur_mad_agent; - struct ib_port_modify port_modify = { - .clr_port_cap_mask = IB_PORT_CM_SUP - }; - unsigned long flags; - int i; - - if (!cm_dev) - return; - - write_lock_irqsave(&cm.device_lock, flags); - list_del(&cm_dev->list); - write_unlock_irqrestore(&cm.device_lock, flags); - - spin_lock_irq(&cm.lock); - cm_dev->going_down = 1; - spin_unlock_irq(&cm.lock); - - for (i = 1; i <= ib_device->phys_port_cnt; i++) { - if (!rdma_cap_ib_cm(ib_device, i)) - continue; - - port = cm_dev->port[i-1]; - ib_modify_port(ib_device, port->port_num, 0, &port_modify); - /* Mark all the cm_id's as not valid */ - spin_lock_irq(&cm.lock); - list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) - cm_id_priv->altr_send_port_not_ready = 1; - list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) - cm_id_priv->prim_send_port_not_ready = 1; - spin_unlock_irq(&cm.lock); - /* - * We flush the queue here after the going_down set, this - * verify that no new works will be queued in the recv handler, - * after that we can call the unregister_mad_agent - */ - flush_workqueue(cm.wq); - spin_lock_irq(&cm.state_lock); - cur_mad_agent = port->mad_agent; - port->mad_agent = NULL; - spin_unlock_irq(&cm.state_lock); - ib_unregister_mad_agent(cur_mad_agent); - cm_remove_port_fs(port); - } - - device_unregister(cm_dev->device); - kfree(cm_dev); -} - -static int __init ib_cm_init(void) -{ - int ret; - - memset(&cm, 0, sizeof cm); - INIT_LIST_HEAD(&cm.device_list); - rwlock_init(&cm.device_lock); - spin_lock_init(&cm.lock); - spin_lock_init(&cm.state_lock); - cm.listen_service_table = RB_ROOT; - cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); - cm.remote_id_table = RB_ROOT; - cm.remote_qp_table = RB_ROOT; - cm.remote_sidr_table = RB_ROOT; - idr_init(&cm.local_id_table); - get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); - INIT_LIST_HEAD(&cm.timewait_list); - - ret = class_register(&cm_class); - if (ret) { - ret = -ENOMEM; - goto error1; - } - - cm.wq = create_workqueue("ib_cm"); - if (!cm.wq) { - ret = -ENOMEM; - goto error2; - } - - ret = ib_register_client(&cm_client); - if (ret) - goto error3; - - return 0; -error3: - destroy_workqueue(cm.wq); -error2: - class_unregister(&cm_class); -error1: - idr_destroy(&cm.local_id_table); - return ret; -} - -static void __exit ib_cm_cleanup(void) -{ - struct cm_timewait_info *timewait_info, *tmp; - - spin_lock_irq(&cm.lock); - list_for_each_entry(timewait_info, &cm.timewait_list, list) - cancel_delayed_work(&timewait_info->work.work); - spin_unlock_irq(&cm.lock); - - ib_unregister_client(&cm_client); - destroy_workqueue(cm.wq); - - list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { - list_del(&timewait_info->list); - kfree(timewait_info); - } - - class_unregister(&cm_class); - idr_destroy(&cm.local_id_table); -} - -module_init_order(ib_cm_init, SI_ORDER_SECOND); -module_exit_order(ib_cm_cleanup, SI_ORDER_FIRST); - Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/cm.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucma.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucma.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucma.c (nonexistent) @@ -1,1755 +0,0 @@ -/* - * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); -MODULE_LICENSE("Dual BSD/GPL"); - -static unsigned int max_backlog = 1024; - -struct ucma_file { - struct mutex mut; - struct file *filp; - struct list_head ctx_list; - struct list_head event_list; - wait_queue_head_t poll_wait; - struct workqueue_struct *close_wq; -}; - -struct ucma_context { - int id; - struct completion comp; - atomic_t ref; - int events_reported; - int backlog; - - struct ucma_file *file; - struct rdma_cm_id *cm_id; - u64 uid; - - struct list_head list; - struct list_head mc_list; - /* mark that device is in process of destroying the internal HW - * resources, protected by the global mut - */ - int closing; - /* sync between removal event and id destroy, protected by file mut */ - int destroying; - struct work_struct close_work; -}; - -struct ucma_multicast { - struct ucma_context *ctx; - int id; - int events_reported; - - u64 uid; - u8 join_state; - struct list_head list; - struct sockaddr_storage addr; -}; - -struct ucma_event { - struct ucma_context *ctx; - struct ucma_multicast *mc; - struct list_head list; - struct rdma_cm_id *cm_id; - struct rdma_ucm_event_resp resp; - struct work_struct close_work; -}; - -static DEFINE_MUTEX(mut); -static DEFINE_IDR(ctx_idr); -static DEFINE_IDR(multicast_idr); - -static inline struct ucma_context *_ucma_find_context(int id, - struct ucma_file *file) -{ - struct ucma_context *ctx; - - ctx = idr_find(&ctx_idr, id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - return ctx; -} - -static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) -{ - struct ucma_context *ctx; - - mutex_lock(&mut); - ctx = _ucma_find_context(id, file); - if (!IS_ERR(ctx)) { - if (ctx->closing) - ctx = ERR_PTR(-EIO); - else - atomic_inc(&ctx->ref); - } - mutex_unlock(&mut); - return ctx; -} - -static void ucma_put_ctx(struct ucma_context *ctx) -{ - if (atomic_dec_and_test(&ctx->ref)) - complete(&ctx->comp); -} - -static void ucma_close_event_id(struct work_struct *work) -{ - struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); - - rdma_destroy_id(uevent_close->cm_id); - kfree(uevent_close); -} - -static void ucma_close_id(struct work_struct *work) -{ - struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); - - /* once all inflight tasks are finished, we close all underlying - * resources. The context is still alive till its explicit destryoing - * by its creator. - */ - ucma_put_ctx(ctx); - wait_for_completion(&ctx->comp); - /* No new events will be generated after destroying the id. */ - rdma_destroy_id(ctx->cm_id); -} - -static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) -{ - struct ucma_context *ctx; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - INIT_WORK(&ctx->close_work, ucma_close_id); - atomic_set(&ctx->ref, 1); - init_completion(&ctx->comp); - INIT_LIST_HEAD(&ctx->mc_list); - ctx->file = file; - - mutex_lock(&mut); - ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&mut); - if (ctx->id < 0) - goto error; - - list_add_tail(&ctx->list, &file->ctx_list); - return ctx; - -error: - kfree(ctx); - return NULL; -} - -static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) -{ - struct ucma_multicast *mc; - - mc = kzalloc(sizeof(*mc), GFP_KERNEL); - if (!mc) - return NULL; - - mutex_lock(&mut); - mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); - mutex_unlock(&mut); - if (mc->id < 0) - goto error; - - mc->ctx = ctx; - list_add_tail(&mc->list, &ctx->mc_list); - return mc; - -error: - kfree(mc); - return NULL; -} - -static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, - struct rdma_conn_param *src) -{ - if (src->private_data_len) - memcpy(dst->private_data, src->private_data, - src->private_data_len); - dst->private_data_len = src->private_data_len; - dst->responder_resources =src->responder_resources; - dst->initiator_depth = src->initiator_depth; - dst->flow_control = src->flow_control; - dst->retry_count = src->retry_count; - dst->rnr_retry_count = src->rnr_retry_count; - dst->srq = src->srq; - dst->qp_num = src->qp_num; -} - -static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, - struct rdma_ud_param *src) -{ - if (src->private_data_len) - memcpy(dst->private_data, src->private_data, - src->private_data_len); - dst->private_data_len = src->private_data_len; - ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); - dst->qp_num = src->qp_num; - dst->qkey = src->qkey; -} - -static void ucma_set_event_context(struct ucma_context *ctx, - struct rdma_cm_event *event, - struct ucma_event *uevent) -{ - uevent->ctx = ctx; - switch (event->event) { - case RDMA_CM_EVENT_MULTICAST_JOIN: - case RDMA_CM_EVENT_MULTICAST_ERROR: - uevent->mc = __DECONST(struct ucma_multicast *, - event->param.ud.private_data); - uevent->resp.uid = uevent->mc->uid; - uevent->resp.id = uevent->mc->id; - break; - default: - uevent->resp.uid = ctx->uid; - uevent->resp.id = ctx->id; - break; - } -} - -/* Called with file->mut locked for the relevant context. */ -static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) -{ - struct ucma_context *ctx = cm_id->context; - struct ucma_event *con_req_eve; - int event_found = 0; - - if (ctx->destroying) - return; - - /* only if context is pointing to cm_id that it owns it and can be - * queued to be closed, otherwise that cm_id is an inflight one that - * is part of that context event list pending to be detached and - * reattached to its new context as part of ucma_get_event, - * handled separately below. - */ - if (ctx->cm_id == cm_id) { - mutex_lock(&mut); - ctx->closing = 1; - mutex_unlock(&mut); - queue_work(ctx->file->close_wq, &ctx->close_work); - return; - } - - list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { - if (con_req_eve->cm_id == cm_id && - con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { - list_del(&con_req_eve->list); - INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); - queue_work(ctx->file->close_wq, &con_req_eve->close_work); - event_found = 1; - break; - } - } - if (!event_found) - pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); -} - -static int ucma_event_handler(struct rdma_cm_id *cm_id, - struct rdma_cm_event *event) -{ - struct ucma_event *uevent; - struct ucma_context *ctx = cm_id->context; - int ret = 0; - - uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); - if (!uevent) - return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; - - mutex_lock(&ctx->file->mut); - uevent->cm_id = cm_id; - ucma_set_event_context(ctx, event, uevent); - uevent->resp.event = event->event; - uevent->resp.status = event->status; - if (cm_id->qp_type == IB_QPT_UD) - ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); - else - ucma_copy_conn_event(&uevent->resp.param.conn, - &event->param.conn); - - if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { - if (!ctx->backlog) { - ret = -ENOMEM; - kfree(uevent); - goto out; - } - ctx->backlog--; - } else if (!ctx->uid || ctx->cm_id != cm_id) { - /* - * We ignore events for new connections until userspace has set - * their context. This can only happen if an error occurs on a - * new connection before the user accepts it. This is okay, - * since the accept will just fail later. However, we do need - * to release the underlying HW resources in case of a device - * removal event. - */ - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) - ucma_removal_event_handler(cm_id); - - kfree(uevent); - goto out; - } - - list_add_tail(&uevent->list, &ctx->file->event_list); - wake_up_interruptible(&ctx->file->poll_wait); - linux_poll_wakeup(ctx->file->filp); - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) - ucma_removal_event_handler(cm_id); -out: - mutex_unlock(&ctx->file->mut); - return ret; -} - -static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct ucma_context *ctx; - struct rdma_ucm_get_event cmd; - struct ucma_event *uevent; - int ret = 0; - - if (out_len < sizeof uevent->resp) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->mut); - while (list_empty(&file->event_list)) { - mutex_unlock(&file->mut); - - if (file->filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list))) - return -ERESTARTSYS; - - mutex_lock(&file->mut); - } - - uevent = list_entry(file->event_list.next, struct ucma_event, list); - - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { - ctx = ucma_alloc_ctx(file); - if (!ctx) { - ret = -ENOMEM; - goto done; - } - uevent->ctx->backlog++; - ctx->cm_id = uevent->cm_id; - ctx->cm_id->context = ctx; - uevent->resp.id = ctx->id; - } - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &uevent->resp, sizeof uevent->resp)) { - ret = -EFAULT; - goto done; - } - - list_del(&uevent->list); - uevent->ctx->events_reported++; - if (uevent->mc) - uevent->mc->events_reported++; - kfree(uevent); -done: - mutex_unlock(&file->mut); - return ret; -} - -static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) -{ - switch (cmd->ps) { - case RDMA_PS_TCP: - *qp_type = IB_QPT_RC; - return 0; - case RDMA_PS_UDP: - case RDMA_PS_IPOIB: - *qp_type = IB_QPT_UD; - return 0; - case RDMA_PS_IB: - *qp_type = cmd->qp_type; - return 0; - default: - return -EINVAL; - } -} - -static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_create_id cmd; - struct rdma_ucm_create_id_resp resp; - struct ucma_context *ctx; - enum ib_qp_type qp_type; - int ret; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ret = ucma_get_qp_type(&cmd, &qp_type); - if (ret) - return ret; - - mutex_lock(&file->mut); - ctx = ucma_alloc_ctx(file); - mutex_unlock(&file->mut); - if (!ctx) - return -ENOMEM; - - ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(TD_TO_VNET(curthread), - ucma_event_handler, ctx, cmd.ps, qp_type); - if (IS_ERR(ctx->cm_id)) { - ret = PTR_ERR(ctx->cm_id); - goto err1; - } - - resp.id = ctx->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) { - ret = -EFAULT; - goto err2; - } - return 0; - -err2: - rdma_destroy_id(ctx->cm_id); -err1: - mutex_lock(&mut); - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); - kfree(ctx); - return ret; -} - -static void ucma_cleanup_multicast(struct ucma_context *ctx) -{ - struct ucma_multicast *mc, *tmp; - - mutex_lock(&mut); - list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { - list_del(&mc->list); - idr_remove(&multicast_idr, mc->id); - kfree(mc); - } - mutex_unlock(&mut); -} - -static void ucma_cleanup_mc_events(struct ucma_multicast *mc) -{ - struct ucma_event *uevent, *tmp; - - list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { - if (uevent->mc != mc) - continue; - - list_del(&uevent->list); - kfree(uevent); - } -} - -/* - * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At - * this point, no new events will be reported from the hardware. However, we - * still need to cleanup the UCMA context for this ID. Specifically, there - * might be events that have not yet been consumed by the user space software. - * These might include pending connect requests which we have not completed - * processing. We cannot call rdma_destroy_id while holding the lock of the - * context (file->mut), as it might cause a deadlock. We therefore extract all - * relevant events from the context pending events list while holding the - * mutex. After that we release them as needed. - */ -static int ucma_free_ctx(struct ucma_context *ctx) -{ - int events_reported; - struct ucma_event *uevent, *tmp; - LIST_HEAD(list); - - - ucma_cleanup_multicast(ctx); - - /* Cleanup events not yet reported to the user. */ - mutex_lock(&ctx->file->mut); - list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx == ctx) - list_move_tail(&uevent->list, &list); - } - list_del(&ctx->list); - mutex_unlock(&ctx->file->mut); - - list_for_each_entry_safe(uevent, tmp, &list, list) { - list_del(&uevent->list); - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) - rdma_destroy_id(uevent->cm_id); - kfree(uevent); - } - - events_reported = ctx->events_reported; - kfree(ctx); - return events_reported; -} - -static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_destroy_id cmd; - struct rdma_ucm_destroy_id_resp resp; - struct ucma_context *ctx; - int ret = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&mut); - ctx = _ucma_find_context(cmd.id, file); - if (!IS_ERR(ctx)) - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - mutex_lock(&ctx->file->mut); - ctx->destroying = 1; - mutex_unlock(&ctx->file->mut); - - flush_workqueue(ctx->file->close_wq); - /* At this point it's guaranteed that there is no inflight - * closing task */ - mutex_lock(&mut); - if (!ctx->closing) { - mutex_unlock(&mut); - ucma_put_ctx(ctx); - wait_for_completion(&ctx->comp); - rdma_destroy_id(ctx->cm_id); - } else { - mutex_unlock(&mut); - } - - resp.events_reported = ucma_free_ctx(ctx); - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; - - return ret; -} - -static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_bind_ip cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_bind cmd; - struct sockaddr *addr; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - addr = (struct sockaddr *) &cmd.addr; - if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) - return -EINVAL; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_bind_addr(ctx->cm_id, addr); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_resolve_ip(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_resolve_ip cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, - (struct sockaddr *) &cmd.dst_addr, - cmd.timeout_ms); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_resolve_addr(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_resolve_addr cmd; - struct sockaddr *src, *dst; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - src = (struct sockaddr *) &cmd.src_addr; - dst = (struct sockaddr *) &cmd.dst_addr; - if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || - !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) - return -EINVAL; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_resolve_route(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_resolve_route cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); - ucma_put_ctx(ctx); - return ret; -} - -static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, - struct rdma_route *route) -{ - struct rdma_dev_addr *dev_addr; - - resp->num_paths = route->num_paths; - switch (route->num_paths) { - case 0: - dev_addr = &route->addr.dev_addr; - rdma_addr_get_dgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].dgid); - rdma_addr_get_sgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].sgid); - resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); - break; - case 2: - ib_copy_path_rec_to_user(&resp->ib_route[1], - &route->path_rec[1]); - /* fall through */ - case 1: - ib_copy_path_rec_to_user(&resp->ib_route[0], - &route->path_rec[0]); - break; - default: - break; - } -} - -static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, - struct rdma_route *route) -{ - - resp->num_paths = route->num_paths; - switch (route->num_paths) { - case 0: - rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, - (union ib_gid *)&resp->ib_route[0].dgid); - rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, - (union ib_gid *)&resp->ib_route[0].sgid); - resp->ib_route[0].pkey = cpu_to_be16(0xffff); - break; - case 2: - ib_copy_path_rec_to_user(&resp->ib_route[1], - &route->path_rec[1]); - /* fall through */ - case 1: - ib_copy_path_rec_to_user(&resp->ib_route[0], - &route->path_rec[0]); - break; - default: - break; - } -} - -static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, - struct rdma_route *route) -{ - struct rdma_dev_addr *dev_addr; - - dev_addr = &route->addr.dev_addr; - rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); - rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); -} - -static ssize_t ucma_query_route(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_query cmd; - struct rdma_ucm_query_route_resp resp; - struct ucma_context *ctx; - struct sockaddr *addr; - int ret = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - memset(&resp, 0, sizeof resp); - addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; - memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); - addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; - memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); - if (!ctx->cm_id->device) - goto out; - - resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; - resp.port_num = ctx->cm_id->port_num; - - if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) - ucma_copy_iboe_route(&resp, &ctx->cm_id->route); - else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) - ucma_copy_iw_route(&resp, &ctx->cm_id->route); - -out: - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; - - ucma_put_ctx(ctx); - return ret; -} - -static void ucma_query_device_addr(struct rdma_cm_id *cm_id, - struct rdma_ucm_query_addr_resp *resp) -{ - if (!cm_id->device) - return; - - resp->node_guid = (__force __u64) cm_id->device->node_guid; - resp->port_num = cm_id->port_num; - resp->pkey = (__force __u16) cpu_to_be16( - ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); -} - -static ssize_t ucma_query_addr(struct ucma_context *ctx, - void __user *response, int out_len) -{ - struct rdma_ucm_query_addr_resp resp; - struct sockaddr *addr; - int ret = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - memset(&resp, 0, sizeof resp); - - addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; - resp.src_size = rdma_addr_size(addr); - memcpy(&resp.src_addr, addr, resp.src_size); - - addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; - resp.dst_size = rdma_addr_size(addr); - memcpy(&resp.dst_addr, addr, resp.dst_size); - - ucma_query_device_addr(ctx->cm_id, &resp); - - if (copy_to_user(response, &resp, sizeof(resp))) - ret = -EFAULT; - - return ret; -} - -static ssize_t ucma_query_path(struct ucma_context *ctx, - void __user *response, int out_len) -{ - struct rdma_ucm_query_path_resp *resp; - int i, ret = 0; - - if (out_len < sizeof(*resp)) - return -ENOSPC; - - resp = kzalloc(out_len, GFP_KERNEL); - if (!resp) - return -ENOMEM; - - resp->num_paths = ctx->cm_id->route.num_paths; - for (i = 0, out_len -= sizeof(*resp); - i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); - i++, out_len -= sizeof(struct ib_path_rec_data)) { - - resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | - IB_PATH_BIDIRECTIONAL; - ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], - &resp->path_data[i].path_rec); - } - - if (copy_to_user(response, resp, - sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) - ret = -EFAULT; - - kfree(resp); - return ret; -} - -static ssize_t ucma_query_gid(struct ucma_context *ctx, - void __user *response, int out_len) -{ - struct rdma_ucm_query_addr_resp resp; - struct sockaddr_ib *addr; - int ret = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - memset(&resp, 0, sizeof resp); - - ucma_query_device_addr(ctx->cm_id, &resp); - - addr = (struct sockaddr_ib *) &resp.src_addr; - resp.src_size = sizeof(*addr); - if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { - memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); - } else { - addr->sib_family = AF_IB; - addr->sib_pkey = (__force __be16) resp.pkey; - rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, - (union ib_gid *) &addr->sib_addr); - addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) - &ctx->cm_id->route.addr.src_addr); - } - - addr = (struct sockaddr_ib *) &resp.dst_addr; - resp.dst_size = sizeof(*addr); - if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { - memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); - } else { - addr->sib_family = AF_IB; - addr->sib_pkey = (__force __be16) resp.pkey; - rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, - (union ib_gid *) &addr->sib_addr); - addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) - &ctx->cm_id->route.addr.dst_addr); - } - - if (copy_to_user(response, &resp, sizeof(resp))) - ret = -EFAULT; - - return ret; -} - -static ssize_t ucma_query(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_query cmd; - struct ucma_context *ctx; - void __user *response; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - response = (void __user *)(unsigned long) cmd.response; - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - switch (cmd.option) { - case RDMA_USER_CM_QUERY_ADDR: - ret = ucma_query_addr(ctx, response, out_len); - break; - case RDMA_USER_CM_QUERY_PATH: - ret = ucma_query_path(ctx, response, out_len); - break; - case RDMA_USER_CM_QUERY_GID: - ret = ucma_query_gid(ctx, response, out_len); - break; - default: - ret = -ENOSYS; - break; - } - - ucma_put_ctx(ctx); - return ret; -} - -static void ucma_copy_conn_param(struct rdma_cm_id *id, - struct rdma_conn_param *dst, - struct rdma_ucm_conn_param *src) -{ - dst->private_data = src->private_data; - dst->private_data_len = src->private_data_len; - dst->responder_resources =src->responder_resources; - dst->initiator_depth = src->initiator_depth; - dst->flow_control = src->flow_control; - dst->retry_count = src->retry_count; - dst->rnr_retry_count = src->rnr_retry_count; - dst->srq = src->srq; - dst->qp_num = src->qp_num; - dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; -} - -static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_connect cmd; - struct rdma_conn_param conn_param; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - if (!cmd.conn_param.valid) - return -EINVAL; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); - ret = rdma_connect(ctx->cm_id, &conn_param); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_listen cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? - cmd.backlog : max_backlog; - ret = rdma_listen(ctx->cm_id, ctx->backlog); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_accept cmd; - struct rdma_conn_param conn_param; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - if (cmd.conn_param.valid) { - ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); - mutex_lock(&file->mut); - ret = rdma_accept(ctx->cm_id, &conn_param); - if (!ret) - ctx->uid = cmd.uid; - mutex_unlock(&file->mut); - } else - ret = rdma_accept(ctx->cm_id, NULL); - - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_reject cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_disconnect cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_disconnect(ctx->cm_id); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_init_qp_attr(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_init_qp_attr cmd; - struct ib_uverbs_qp_attr resp; - struct ucma_context *ctx; - struct ib_qp_attr qp_attr; - int ret; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.qp_attr_mask = 0; - memset(&qp_attr, 0, sizeof qp_attr); - qp_attr.qp_state = cmd.qp_state; - ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); - if (ret) - goto out; - - ib_copy_qp_attr_to_user(&resp, &qp_attr); - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; - -out: - ucma_put_ctx(ctx); - return ret; -} - -static int ucma_set_option_id(struct ucma_context *ctx, int optname, - void *optval, size_t optlen) -{ - int ret = 0; - - switch (optname) { - case RDMA_OPTION_ID_TOS: - if (optlen != sizeof(u8)) { - ret = -EINVAL; - break; - } - rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); - break; - case RDMA_OPTION_ID_REUSEADDR: - if (optlen != sizeof(int)) { - ret = -EINVAL; - break; - } - ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); - break; - case RDMA_OPTION_ID_AFONLY: - if (optlen != sizeof(int)) { - ret = -EINVAL; - break; - } - ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); - break; - default: - ret = -ENOSYS; - } - - return ret; -} - -static int ucma_set_ib_path(struct ucma_context *ctx, - struct ib_path_rec_data *path_data, size_t optlen) -{ - struct ib_sa_path_rec sa_path; - struct rdma_cm_event event; - int ret; - - if (optlen % sizeof(*path_data)) - return -EINVAL; - - for (; optlen; optlen -= sizeof(*path_data), path_data++) { - if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | - IB_PATH_BIDIRECTIONAL)) - break; - } - - if (!optlen) - return -EINVAL; - - memset(&sa_path, 0, sizeof(sa_path)); - - ib_sa_unpack_path(path_data->path_rec, &sa_path); - ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); - if (ret) - return ret; - - memset(&event, 0, sizeof event); - event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; - return ucma_event_handler(ctx->cm_id, &event); -} - -static int ucma_set_option_ib(struct ucma_context *ctx, int optname, - void *optval, size_t optlen) -{ - int ret; - - switch (optname) { - case RDMA_OPTION_IB_PATH: - ret = ucma_set_ib_path(ctx, optval, optlen); - break; - default: - ret = -ENOSYS; - } - - return ret; -} - -static int ucma_set_option_level(struct ucma_context *ctx, int level, - int optname, void *optval, size_t optlen) -{ - int ret; - - switch (level) { - case RDMA_OPTION_ID: - ret = ucma_set_option_id(ctx, optname, optval, optlen); - break; - case RDMA_OPTION_IB: - ret = ucma_set_option_ib(ctx, optname, optval, optlen); - break; - default: - ret = -ENOSYS; - } - - return ret; -} - -static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_set_option cmd; - struct ucma_context *ctx; - void *optval; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - optval = memdup_user((void __user *) (unsigned long) cmd.optval, - cmd.optlen); - if (IS_ERR(optval)) { - ret = PTR_ERR(optval); - goto out; - } - - ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, - cmd.optlen); - kfree(optval); - -out: - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_notify cmd; - struct ucma_context *ctx; - int ret; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ucma_get_ctx(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_process_join(struct ucma_file *file, - struct rdma_ucm_join_mcast *cmd, int out_len) -{ - struct rdma_ucm_create_id_resp resp; - struct ucma_context *ctx; - struct ucma_multicast *mc; - struct sockaddr *addr; - int ret; - u8 join_state; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - addr = (struct sockaddr *) &cmd->addr; - if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) - return -EINVAL; - - if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) - join_state = BIT(FULLMEMBER_JOIN); - else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) - join_state = BIT(SENDONLY_FULLMEMBER_JOIN); - else - return -EINVAL; - - ctx = ucma_get_ctx(file, cmd->id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - mutex_lock(&file->mut); - mc = ucma_alloc_multicast(ctx); - if (!mc) { - ret = -ENOMEM; - goto err1; - } - mc->join_state = join_state; - mc->uid = cmd->uid; - memcpy(&mc->addr, addr, cmd->addr_size); - ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, - join_state, mc); - if (ret) - goto err2; - - resp.id = mc->id; - if (copy_to_user((void __user *)(unsigned long) cmd->response, - &resp, sizeof(resp))) { - ret = -EFAULT; - goto err3; - } - - mutex_unlock(&file->mut); - ucma_put_ctx(ctx); - return 0; - -err3: - rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); - ucma_cleanup_mc_events(mc); -err2: - mutex_lock(&mut); - idr_remove(&multicast_idr, mc->id); - mutex_unlock(&mut); - list_del(&mc->list); - kfree(mc); -err1: - mutex_unlock(&file->mut); - ucma_put_ctx(ctx); - return ret; -} - -static ssize_t ucma_join_ip_multicast(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_join_ip_mcast cmd; - struct rdma_ucm_join_mcast join_cmd; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - join_cmd.response = cmd.response; - join_cmd.uid = cmd.uid; - join_cmd.id = cmd.id; - join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); - join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; - memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); - - return ucma_process_join(file, &join_cmd, out_len); -} - -static ssize_t ucma_join_multicast(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_join_mcast cmd; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - return ucma_process_join(file, &cmd, out_len); -} - -static ssize_t ucma_leave_multicast(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_destroy_id cmd; - struct rdma_ucm_destroy_id_resp resp; - struct ucma_multicast *mc; - int ret = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&mut); - mc = idr_find(&multicast_idr, cmd.id); - if (!mc) - mc = ERR_PTR(-ENOENT); - else if (mc->ctx->file != file) - mc = ERR_PTR(-EINVAL); - else if (!atomic_inc_not_zero(&mc->ctx->ref)) - mc = ERR_PTR(-ENXIO); - else - idr_remove(&multicast_idr, mc->id); - mutex_unlock(&mut); - - if (IS_ERR(mc)) { - ret = PTR_ERR(mc); - goto out; - } - - rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); - mutex_lock(&mc->ctx->file->mut); - ucma_cleanup_mc_events(mc); - list_del(&mc->list); - mutex_unlock(&mc->ctx->file->mut); - - ucma_put_ctx(mc->ctx); - resp.events_reported = mc->events_reported; - kfree(mc); - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; -out: - return ret; -} - -static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) -{ - /* Acquire mutex's based on pointer comparison to prevent deadlock. */ - if (file1 < file2) { - mutex_lock(&file1->mut); - mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); - } else { - mutex_lock(&file2->mut); - mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); - } -} - -static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) -{ - if (file1 < file2) { - mutex_unlock(&file2->mut); - mutex_unlock(&file1->mut); - } else { - mutex_unlock(&file1->mut); - mutex_unlock(&file2->mut); - } -} - -static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) -{ - struct ucma_event *uevent, *tmp; - - list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) - if (uevent->ctx == ctx) - list_move_tail(&uevent->list, &file->event_list); -} - -static ssize_t ucma_migrate_id(struct ucma_file *new_file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct rdma_ucm_migrate_id cmd; - struct rdma_ucm_migrate_resp resp; - struct ucma_context *ctx; - struct fd f; - struct ucma_file *cur_file; - int ret = 0; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - /* Get current fd to protect against it being closed */ - f = fdget(cmd.fd); - if (!f.file) - return -ENOENT; - - /* Validate current fd and prevent destruction of id. */ - ctx = ucma_get_ctx(f.file->private_data, cmd.id); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto file_put; - } - - cur_file = ctx->file; - if (cur_file == new_file) { - resp.events_reported = ctx->events_reported; - goto response; - } - - /* - * Migrate events between fd's, maintaining order, and avoiding new - * events being added before existing events. - */ - ucma_lock_files(cur_file, new_file); - mutex_lock(&mut); - - list_move_tail(&ctx->list, &new_file->ctx_list); - ucma_move_events(ctx, new_file); - ctx->file = new_file; - resp.events_reported = ctx->events_reported; - - mutex_unlock(&mut); - ucma_unlock_files(cur_file, new_file); - -response: - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; - - ucma_put_ctx(ctx); -file_put: - fdput(f); - return ret; -} - -static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) = { - [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, - [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, - [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, - [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, - [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, - [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, - [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, - [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, - [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, - [RDMA_USER_CM_CMD_REJECT] = ucma_reject, - [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, - [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, - [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, - [RDMA_USER_CM_CMD_GET_OPTION] = NULL, - [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, - [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, - [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, - [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, - [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, - [RDMA_USER_CM_CMD_QUERY] = ucma_query, - [RDMA_USER_CM_CMD_BIND] = ucma_bind, - [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, - [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast -}; - -static ssize_t ucma_write(struct file *filp, const char __user *buf, - size_t len, loff_t *pos) -{ - struct ucma_file *file = filp->private_data; - struct rdma_ucm_cmd_hdr hdr; - ssize_t ret; - - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) - return -EACCES; - - if (len < sizeof(hdr)) - return -EINVAL; - - if (copy_from_user(&hdr, buf, sizeof(hdr))) - return -EFAULT; - - if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) - return -EINVAL; - - if (hdr.in + sizeof(hdr) > len) - return -EINVAL; - - if (!ucma_cmd_table[hdr.cmd]) - return -ENOSYS; - - ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); - if (!ret) - ret = len; - - return ret; -} - -static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) -{ - struct ucma_file *file = filp->private_data; - unsigned int mask = 0; - - poll_wait(filp, &file->poll_wait, wait); - - if (!list_empty(&file->event_list)) - mask = POLLIN | POLLRDNORM; - - return mask; -} - -/* - * ucma_open() does not need the BKL: - * - * - no global state is referred to; - * - there is no ioctl method to race against; - * - no further module initialization is required for open to work - * after the device is registered. - */ -static int ucma_open(struct inode *inode, struct file *filp) -{ - struct ucma_file *file; - - file = kmalloc(sizeof *file, GFP_KERNEL); - if (!file) - return -ENOMEM; - - file->close_wq = alloc_ordered_workqueue("ucma_close_id", - WQ_MEM_RECLAIM); - if (!file->close_wq) { - kfree(file); - return -ENOMEM; - } - - INIT_LIST_HEAD(&file->event_list); - INIT_LIST_HEAD(&file->ctx_list); - init_waitqueue_head(&file->poll_wait); - mutex_init(&file->mut); - - filp->private_data = file; - file->filp = filp; - - return nonseekable_open(inode, filp); -} - -static int ucma_close(struct inode *inode, struct file *filp) -{ - struct ucma_file *file = filp->private_data; - struct ucma_context *ctx, *tmp; - - mutex_lock(&file->mut); - list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { - ctx->destroying = 1; - mutex_unlock(&file->mut); - - mutex_lock(&mut); - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); - - flush_workqueue(file->close_wq); - /* At that step once ctx was marked as destroying and workqueue - * was flushed we are safe from any inflights handlers that - * might put other closing task. - */ - mutex_lock(&mut); - if (!ctx->closing) { - mutex_unlock(&mut); - /* rdma_destroy_id ensures that no event handlers are - * inflight for that id before releasing it. - */ - rdma_destroy_id(ctx->cm_id); - } else { - mutex_unlock(&mut); - } - - ucma_free_ctx(ctx); - mutex_lock(&file->mut); - } - mutex_unlock(&file->mut); - destroy_workqueue(file->close_wq); - kfree(file); - return 0; -} - -static long -ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - - switch (cmd) { - case FIONBIO: - case FIOASYNC: - return (0); - default: - return (-ENOTTY); - } -} - -static const struct file_operations ucma_fops = { - .owner = THIS_MODULE, - .open = ucma_open, - .release = ucma_close, - .write = ucma_write, - .unlocked_ioctl = ucma_ioctl, - .poll = ucma_poll, - .llseek = no_llseek, -}; - -static struct miscdevice ucma_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "rdma_cm", - .nodename = "infiniband/rdma_cm", - .mode = 0666, - .fops = &ucma_fops, -}; - -static ssize_t show_abi_version(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); -} -static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); - -static int __init ucma_init(void) -{ - int ret; - - ret = misc_register(&ucma_misc); - if (ret) - return ret; - - ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); - if (ret) { - pr_err("rdma_ucm: couldn't create abi_version attr\n"); - goto err1; - } - - return 0; -err1: - misc_deregister(&ucma_misc); - return ret; -} - -static void __exit ucma_cleanup(void) -{ - device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); - misc_deregister(&ucma_misc); - idr_destroy(&ctx_idr); - idr_destroy(&multicast_idr); -} - -module_init(ucma_init); -module_exit(ucma_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ucma.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_cmd.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_cmd.c (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_cmd.c (nonexistent) @@ -1,4251 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. - * Copyright (c) 2006 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#define LINUXKPI_PARAM_PREFIX ibcore_ - -#include -#include -#include -#include -#include - -#include - -#include "uverbs.h" -#include "core_priv.h" - -#include - -struct uverbs_lock_class { - char name[16]; -}; - -static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; -static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; -static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; -static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; -static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; -static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; -static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; -static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; -static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; -static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; -static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; - -/* - * The ib_uobject locking scheme is as follows: - * - * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr write operations. When an object is - * looked up, a reference must be taken on the object's kref before - * dropping this lock. For read operations, the rcu_read_lock() - * and rcu_write_lock() but similarly the kref reference is grabbed - * before the rcu_read_unlock(). - * - * - Each object also has an rwsem. This rwsem must be held for - * reading while an operation that uses the object is performed. - * For example, while registering an MR, the associated PD's - * uobject.mutex must be held for reading. The rwsem must be held - * for writing while initializing or destroying an object. - * - * - In addition, each object has a "live" flag. If this flag is not - * set, then lookups of the object will fail even if it is found in - * the idr. This handles a reader that blocks and does not acquire - * the rwsem until after the object is destroyed. The destroy - * operation will set the live flag to 0 and then drop the rwsem; - * this will allow the reader to acquire the rwsem, see that the - * live flag is 0, and then drop the rwsem and its reference to - * object. The underlying storage will not be freed until the last - * reference to the object is dropped. - */ - -static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct uverbs_lock_class *c) -{ - uobj->user_handle = user_handle; - uobj->context = context; - kref_init(&uobj->ref); - init_rwsem(&uobj->mutex); - uobj->live = 0; -} - -static void release_uobj(struct kref *kref) -{ - kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); -} - -static void put_uobj(struct ib_uobject *uobj) -{ - kref_put(&uobj->ref, release_uobj); -} - -static void put_uobj_read(struct ib_uobject *uobj) -{ - up_read(&uobj->mutex); - put_uobj(uobj); -} - -static void put_uobj_write(struct ib_uobject *uobj) -{ - up_write(&uobj->mutex); - put_uobj(uobj); -} - -static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock(&ib_uverbs_idr_lock); - - ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&ib_uverbs_idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) -{ - spin_lock(&ib_uverbs_idr_lock); - idr_remove(idr, uobj->id); - spin_unlock(&ib_uverbs_idr_lock); -} - -static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - rcu_read_lock(); - uobj = idr_find(idr, id); - if (uobj) { - if (uobj->context == context) - kref_get(&uobj->ref); - else - uobj = NULL; - } - rcu_read_unlock(); - - return uobj; -} - -static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context, int nested) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; - - if (nested) - down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); - else - down_read(&uobj->mutex); - if (!uobj->live) { - put_uobj_read(uobj); - return NULL; - } - - return uobj; -} - -static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; - - down_write(&uobj->mutex); - if (!uobj->live) { - put_uobj_write(uobj); - return NULL; - } - - return uobj; -} - -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, - int nested) -{ - struct ib_uobject *uobj; - - uobj = idr_read_uobj(idr, id, context, nested); - return uobj ? uobj->object : NULL; -} - -static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); -} - -static void put_pd_read(struct ib_pd *pd) -{ - put_uobj_read(pd->uobject); -} - -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) -{ - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); -} - -static void put_cq_read(struct ib_cq *cq) -{ - put_uobj_read(cq->uobject); -} - -static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); -} - -static void put_ah_read(struct ib_ah *ah) -{ - put_uobj_read(ah->uobject); -} - -static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); -} - -static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); -} - -static void put_wq_read(struct ib_wq *wq) -{ - put_uobj_read(wq->uobject); -} - -static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, - struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); -} - -static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) -{ - put_uobj_read(ind_table->uobject); -} - -static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); - return uobj ? uobj->object : NULL; -} - -static void put_qp_read(struct ib_qp *qp) -{ - put_uobj_read(qp->uobject); -} - -static void put_qp_write(struct ib_qp *qp) -{ - put_uobj_write(qp->uobject); -} - -static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); -} - -static void put_srq_read(struct ib_srq *srq) -{ - put_uobj_read(srq->uobject); -} - -static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, - struct ib_uobject **uobj) -{ - *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); - return *uobj ? (*uobj)->object : NULL; -} - -static void put_xrcd_read(struct ib_uobject *uobj) -{ - put_uobj_read(uobj); -} - -ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_get_context cmd; - struct ib_uverbs_get_context_resp resp; - struct ib_udata udata; - struct ib_ucontext *ucontext; - struct file *filp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - mutex_lock(&file->mutex); - - if (file->ucontext) { - ret = -EINVAL; - goto err; - } - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); - if (IS_ERR(ucontext)) { - ret = PTR_ERR(ucontext); - goto err; - } - - ucontext->device = ib_dev; - INIT_LIST_HEAD(&ucontext->pd_list); - INIT_LIST_HEAD(&ucontext->mr_list); - INIT_LIST_HEAD(&ucontext->mw_list); - INIT_LIST_HEAD(&ucontext->cq_list); - INIT_LIST_HEAD(&ucontext->qp_list); - INIT_LIST_HEAD(&ucontext->srq_list); - INIT_LIST_HEAD(&ucontext->ah_list); - INIT_LIST_HEAD(&ucontext->wq_list); - INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); - INIT_LIST_HEAD(&ucontext->xrcd_list); - INIT_LIST_HEAD(&ucontext->rule_list); - rcu_read_lock(); - ucontext->tgid = get_pid(task_pid_group_leader(current)); - rcu_read_unlock(); - ucontext->closing = 0; - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - ucontext->umem_tree = RB_ROOT; - init_rwsem(&ucontext->umem_rwsem); - ucontext->odp_mrs_count = 0; - INIT_LIST_HEAD(&ucontext->no_private_counters); - - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) - ucontext->invalidate_range = NULL; - -#endif - - resp.num_comp_vectors = file->device->num_comp_vectors; - - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - goto err_free; - resp.async_fd = ret; - - filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); - goto err_fd; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_file; - } - - file->ucontext = ucontext; - - fd_install(resp.async_fd, filp); - - mutex_unlock(&file->mutex); - - return in_len; - -err_file: - ib_uverbs_free_async_event_file(file); - fput(filp); - -err_fd: - put_unused_fd(resp.async_fd); - -err_free: - put_pid(ucontext->tgid); - ib_dev->dealloc_ucontext(ucontext); - -err: - mutex_unlock(&file->mutex); - return ret; -} - -static void copy_query_dev_fields(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_uverbs_query_device_resp *resp, - struct ib_device_attr *attr) -{ - resp->fw_ver = attr->fw_ver; - resp->node_guid = ib_dev->node_guid; - resp->sys_image_guid = attr->sys_image_guid; - resp->max_mr_size = attr->max_mr_size; - resp->page_size_cap = attr->page_size_cap; - resp->vendor_id = attr->vendor_id; - resp->vendor_part_id = attr->vendor_part_id; - resp->hw_ver = attr->hw_ver; - resp->max_qp = attr->max_qp; - resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = (u32)(attr->device_cap_flags); - resp->max_sge = attr->max_sge; - resp->max_sge_rd = attr->max_sge_rd; - resp->max_cq = attr->max_cq; - resp->max_cqe = attr->max_cqe; - resp->max_mr = attr->max_mr; - resp->max_pd = attr->max_pd; - resp->max_qp_rd_atom = attr->max_qp_rd_atom; - resp->max_ee_rd_atom = attr->max_ee_rd_atom; - resp->max_res_rd_atom = attr->max_res_rd_atom; - resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom; - resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom; - resp->atomic_cap = attr->atomic_cap; - resp->max_ee = attr->max_ee; - resp->max_rdd = attr->max_rdd; - resp->max_mw = attr->max_mw; - resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp; - resp->max_raw_ethy_qp = attr->max_raw_ethy_qp; - resp->max_mcast_grp = attr->max_mcast_grp; - resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; - resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; - resp->max_ah = attr->max_ah; - resp->max_fmr = attr->max_fmr; - resp->max_map_per_fmr = attr->max_map_per_fmr; - resp->max_srq = attr->max_srq; - resp->max_srq_wr = attr->max_srq_wr; - resp->max_srq_sge = attr->max_srq_sge; - resp->max_pkeys = attr->max_pkeys; - resp->local_ca_ack_delay = attr->local_ca_ack_delay; - resp->phys_port_cnt = ib_dev->phys_port_cnt; -} - -ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_device cmd; - struct ib_uverbs_query_device_resp resp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_port cmd; - struct ib_uverbs_query_port_resp resp; - struct ib_port_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ret = ib_query_port(ib_dev, cmd.port_num, &attr); - if (ret) - return ret; - - memset(&resp, 0, sizeof resp); - - resp.state = attr.state; - resp.max_mtu = attr.max_mtu; - resp.active_mtu = attr.active_mtu; - resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = attr.port_cap_flags; - resp.max_msg_sz = attr.max_msg_sz; - resp.bad_pkey_cntr = attr.bad_pkey_cntr; - resp.qkey_viol_cntr = attr.qkey_viol_cntr; - resp.pkey_tbl_len = attr.pkey_tbl_len; - resp.lid = attr.lid; - resp.sm_lid = attr.sm_lid; - resp.lmc = attr.lmc; - resp.max_vl_num = attr.max_vl_num; - resp.sm_sl = attr.sm_sl; - resp.subnet_timeout = attr.subnet_timeout; - resp.init_type_reply = attr.init_type_reply; - resp.active_width = attr.active_width; - resp.active_speed = attr.active_speed; - resp.phys_state = attr.phys_state; - resp.link_layer = rdma_port_get_link_layer(ib_dev, - cmd.port_num); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_alloc_pd cmd; - struct ib_uverbs_alloc_pd_resp resp; - struct ib_udata udata; - struct ib_uobject *uobj; - struct ib_pd *pd; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &pd_lock_class); - down_write(&uobj->mutex); - - pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); - goto err; - } - - pd->device = ib_dev; - pd->uobject = uobj; - pd->__internal_mr = NULL; - atomic_set(&pd->usecnt, 0); - - uobj->object = pd; - ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); - if (ret) - goto err_idr; - - memset(&resp, 0, sizeof resp); - resp.pd_handle = uobj->id; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - - return in_len; - -err_copy: - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - -err_idr: - ib_dealloc_pd(pd); - -err: - put_uobj_write(uobj); - return ret; -} - -ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_dealloc_pd cmd; - struct ib_uobject *uobj; - struct ib_pd *pd; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); - if (!uobj) - return -EINVAL; - pd = uobj->object; - - if (atomic_read(&pd->usecnt)) { - ret = -EBUSY; - goto err_put; - } - - ret = pd->device->dealloc_pd(uobj->object); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); - if (ret) - goto err_put; - - uobj->live = 0; - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; - -err_put: - put_uobj_write(uobj); - return ret; -} - -struct xrcd_table_entry { - struct rb_node node; - struct ib_xrcd *xrcd; - struct inode *inode; -}; - -static int xrcd_table_insert(struct ib_uverbs_device *dev, - struct inode *inode, - struct ib_xrcd *xrcd) -{ - struct xrcd_table_entry *entry, *scan; - struct rb_node **p = &dev->xrcd_tree.rb_node; - struct rb_node *parent = NULL; - - entry = kmalloc(sizeof *entry, GFP_KERNEL); - if (!entry) - return -ENOMEM; - - entry->xrcd = xrcd; - entry->inode = inode; - - while (*p) { - parent = *p; - scan = rb_entry(parent, struct xrcd_table_entry, node); - - if (inode < scan->inode) { - p = &(*p)->rb_left; - } else if (inode > scan->inode) { - p = &(*p)->rb_right; - } else { - kfree(entry); - return -EEXIST; - } - } - - rb_link_node(&entry->node, parent, p); - rb_insert_color(&entry->node, &dev->xrcd_tree); - igrab(inode); - return 0; -} - -static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, - struct inode *inode) -{ - struct xrcd_table_entry *entry; - struct rb_node *p = dev->xrcd_tree.rb_node; - - while (p) { - entry = rb_entry(p, struct xrcd_table_entry, node); - - if (inode < entry->inode) - p = p->rb_left; - else if (inode > entry->inode) - p = p->rb_right; - else - return entry; - } - - return NULL; -} - -static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) -{ - struct xrcd_table_entry *entry; - - entry = xrcd_table_search(dev, inode); - if (!entry) - return NULL; - - return entry->xrcd; -} - -static void xrcd_table_delete(struct ib_uverbs_device *dev, - struct inode *inode) -{ - struct xrcd_table_entry *entry; - - entry = xrcd_table_search(dev, inode); - if (entry) { - iput(inode); - rb_erase(&entry->node, &dev->xrcd_tree); - kfree(entry); - } -} - -ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_open_xrcd cmd; - struct ib_uverbs_open_xrcd_resp resp; - struct ib_udata udata; - struct ib_uxrcd_object *obj; - struct ib_xrcd *xrcd = NULL; - struct fd f = {NULL}; - struct inode *inode = NULL; - int ret = 0; - int new_xrcd = 0; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - mutex_lock(&file->device->xrcd_tree_mutex); - - if (cmd.fd != -1) { - /* search for file descriptor */ - f = fdget(cmd.fd); - if (!f.file) { - ret = -EBADF; - goto err_tree_mutex_unlock; - } - - inode = f.file->f_dentry->d_inode; - xrcd = find_xrcd(file->device, inode); - if (!xrcd && !(cmd.oflags & O_CREAT)) { - /* no file descriptor. Need CREATE flag */ - ret = -EAGAIN; - goto err_tree_mutex_unlock; - } - - if (xrcd && cmd.oflags & O_EXCL) { - ret = -EINVAL; - goto err_tree_mutex_unlock; - } - } - - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) { - ret = -ENOMEM; - goto err_tree_mutex_unlock; - } - - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); - - down_write(&obj->uobject.mutex); - - if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); - if (IS_ERR(xrcd)) { - ret = PTR_ERR(xrcd); - goto err; - } - - xrcd->inode = inode; - xrcd->device = ib_dev; - atomic_set(&xrcd->usecnt, 0); - mutex_init(&xrcd->tgt_qp_mutex); - INIT_LIST_HEAD(&xrcd->tgt_qp_list); - new_xrcd = 1; - } - - atomic_set(&obj->refcnt, 0); - obj->uobject.object = xrcd; - ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - if (ret) - goto err_idr; - - memset(&resp, 0, sizeof resp); - resp.xrcd_handle = obj->uobject.id; - - if (inode) { - if (new_xrcd) { - /* create new inode/xrcd table entry */ - ret = xrcd_table_insert(file->device, inode, xrcd); - if (ret) - goto err_insert_xrcd; - } - atomic_inc(&xrcd->usecnt); - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } - - if (f.file) - fdput(f); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - up_write(&obj->uobject.mutex); - - mutex_unlock(&file->device->xrcd_tree_mutex); - return in_len; - -err_copy: - if (inode) { - if (new_xrcd) - xrcd_table_delete(file->device, inode); - atomic_dec(&xrcd->usecnt); - } - -err_insert_xrcd: - idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - -err_idr: - ib_dealloc_xrcd(xrcd); - -err: - put_uobj_write(&obj->uobject); - -err_tree_mutex_unlock: - if (f.file) - fdput(f); - - mutex_unlock(&file->device->xrcd_tree_mutex); - - return ret; -} - -ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_close_xrcd cmd; - struct ib_uobject *uobj; - struct ib_xrcd *xrcd = NULL; - struct inode *inode = NULL; - struct ib_uxrcd_object *obj; - int live; - int ret = 0; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - mutex_lock(&file->device->xrcd_tree_mutex); - uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); - if (!uobj) { - ret = -EINVAL; - goto out; - } - - xrcd = uobj->object; - inode = xrcd->inode; - obj = container_of(uobj, struct ib_uxrcd_object, uobject); - if (atomic_read(&obj->refcnt)) { - put_uobj_write(uobj); - ret = -EBUSY; - goto out; - } - - if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { - ret = ib_dealloc_xrcd(uobj->object); - if (!ret) - uobj->live = 0; - } - - live = uobj->live; - if (inode && ret) - atomic_inc(&xrcd->usecnt); - - put_uobj_write(uobj); - - if (ret) - goto out; - - if (inode && !live) - xrcd_table_delete(file->device, inode); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - ret = in_len; - -out: - mutex_unlock(&file->device->xrcd_tree_mutex); - return ret; -} - -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, - struct ib_xrcd *xrcd) -{ - struct inode *inode; - - inode = xrcd->inode; - if (inode && !atomic_dec_and_test(&xrcd->usecnt)) - return; - - ib_dealloc_xrcd(xrcd); - - if (inode) - xrcd_table_delete(dev, inode); -} - -ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_reg_mr cmd; - struct ib_uverbs_reg_mr_resp resp; - struct ib_udata udata; - struct ib_uobject *uobj; - struct ib_pd *pd; - struct ib_mr *mr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) - return -EINVAL; - - ret = ib_check_mr_access(cmd.access_flags); - if (ret) - return ret; - - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mr_lock_class); - down_write(&uobj->mutex); - - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err_free; - } - - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { - pr_debug("ODP support not available\n"); - ret = -EINVAL; - goto err_put; - } - } - - mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, - cmd.access_flags, &udata); - if (IS_ERR(mr)) { - ret = PTR_ERR(mr); - goto err_put; - } - - mr->device = pd->device; - mr->pd = pd; - mr->uobject = uobj; - atomic_inc(&pd->usecnt); - - uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); - if (ret) - goto err_unreg; - - memset(&resp, 0, sizeof resp); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - resp.mr_handle = uobj->id; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } - - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mr_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - - return in_len; - -err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - -err_unreg: - ib_dereg_mr(mr); - -err_put: - put_pd_read(pd); - -err_free: - put_uobj_write(uobj); - return ret; -} - -ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_rereg_mr cmd; - struct ib_uverbs_rereg_mr_resp resp; - struct ib_udata udata; - struct ib_pd *pd = NULL; - struct ib_mr *mr; - struct ib_pd *old_pd; - int ret; - struct ib_uobject *uobj; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long) cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); - - if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) - return -EINVAL; - - if ((cmd.flags & IB_MR_REREG_TRANS) && - (!cmd.start || !cmd.hca_va || 0 >= cmd.length || - (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) - return -EINVAL; - - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, - file->ucontext); - - if (!uobj) - return -EINVAL; - - mr = uobj->object; - - if (cmd.flags & IB_MR_REREG_ACCESS) { - ret = ib_check_mr_access(cmd.access_flags); - if (ret) - goto put_uobjs; - } - - if (cmd.flags & IB_MR_REREG_PD) { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto put_uobjs; - } - } - - old_pd = mr->pd; - ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, &udata); - if (!ret) { - if (cmd.flags & IB_MR_REREG_PD) { - atomic_inc(&pd->usecnt); - mr->pd = pd; - atomic_dec(&old_pd->usecnt); - } - } else { - goto put_uobj_pd; - } - - memset(&resp, 0, sizeof(resp)); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; - else - ret = in_len; - -put_uobj_pd: - if (cmd.flags & IB_MR_REREG_PD) - put_pd_read(pd); - -put_uobjs: - - put_uobj_write(mr->uobject); - - return ret; -} - -ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_dereg_mr cmd; - struct ib_mr *mr; - struct ib_uobject *uobj; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mr = uobj->object; - - ret = ib_dereg_mr(mr); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; -} - -ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_alloc_mw cmd; - struct ib_uverbs_alloc_mw_resp resp; - struct ib_uobject *uobj; - struct ib_pd *pd; - struct ib_mw *mw; - struct ib_udata udata; - int ret; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mw_lock_class); - down_write(&uobj->mutex); - - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err_free; - } - - INIT_UDATA(&udata, buf + sizeof(cmd), - (unsigned long)cmd.response + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); - if (IS_ERR(mw)) { - ret = PTR_ERR(mw); - goto err_put; - } - - mw->device = pd->device; - mw->pd = pd; - mw->uobject = uobj; - atomic_inc(&pd->usecnt); - - uobj->object = mw; - ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); - if (ret) - goto err_unalloc; - - memset(&resp, 0, sizeof(resp)); - resp.rkey = mw->rkey; - resp.mw_handle = uobj->id; - - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) { - ret = -EFAULT; - goto err_copy; - } - - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mw_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - - return in_len; - -err_copy: - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - -err_unalloc: - uverbs_dealloc_mw(mw); - -err_put: - put_pd_read(pd); - -err_free: - put_uobj_write(uobj); - return ret; -} - -ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_dealloc_mw cmd; - struct ib_mw *mw; - struct ib_uobject *uobj; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mw = uobj->object; - - ret = uverbs_dealloc_mw(mw); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; -} - -ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_comp_channel cmd; - struct ib_uverbs_create_comp_channel_resp resp; - struct file *filp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - return ret; - resp.fd = ret; - - filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); - if (IS_ERR(filp)) { - put_unused_fd(resp.fd); - return PTR_ERR(filp); - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - put_unused_fd(resp.fd); - fput(filp); - return -EFAULT; - } - - fd_install(resp.fd, filp); - return in_len; -} - -static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_cq *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *udata, - void *context), - void *context) -{ - struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file = NULL; - struct ib_cq *cq; - int ret; - struct ib_uverbs_ex_create_cq_resp resp; - struct ib_cq_init_attr attr = {}; - - if (cmd->comp_vector >= file->device->num_comp_vectors) - return ERR_PTR(-EINVAL); - - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); - down_write(&obj->uobject.mutex); - - if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); - if (!ev_file) { - ret = -EINVAL; - goto err; - } - } - - obj->uverbs_file = file; - obj->comp_events_reported = 0; - obj->async_events_reported = 0; - INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); - - attr.cqe = cmd->cqe; - attr.comp_vector = cmd->comp_vector; - - if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) - attr.flags = cmd->flags; - - cq = ib_dev->create_cq(ib_dev, &attr, - file->ucontext, uhw); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); - goto err_file; - } - - cq->device = ib_dev; - cq->uobject = &obj->uobject; - cq->comp_handler = ib_uverbs_comp_handler; - cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = ev_file; - atomic_set(&cq->usecnt, 0); - - obj->uobject.object = cq; - ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); - if (ret) - goto err_free; - - memset(&resp, 0, sizeof resp); - resp.base.cq_handle = obj->uobject.id; - resp.base.cqe = cq->cqe; - - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); - - ret = cb(file, obj, &resp, ucore, context); - if (ret) - goto err_cb; - - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - - up_write(&obj->uobject.mutex); - - return obj; - -err_cb: - idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - -err_free: - ib_destroy_cq(cq); - -err_file: - if (ev_file) - ib_uverbs_release_ucq(file, ev_file, obj); - -err: - put_uobj_write(&obj->uobject); - - return ERR_PTR(ret); -} - -static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_cq cmd; - struct ib_uverbs_ex_create_cq cmd_ex; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata ucore; - struct ib_udata uhw; - struct ib_ucq_object *obj; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); - - INIT_UDATA(&uhw, buf + sizeof(cmd), - (unsigned long)cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); - - memset(&cmd_ex, 0, sizeof(cmd_ex)); - cmd_ex.user_handle = cmd.user_handle; - cmd_ex.cqe = cmd.cqe; - cmd_ex.comp_vector = cmd.comp_vector; - cmd_ex.comp_channel = cmd.comp_channel; - - obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), comp_channel) + - sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, - NULL); - - if (IS_ERR(obj)) - return PTR_ERR(obj); - - return in_len; -} - -static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; -} - -int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_cq_resp resp; - struct ib_uverbs_ex_create_cq cmd; - struct ib_ucq_object *obj; - int err; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; - - if (cmd.comp_mask) - return -EINVAL; - - if (cmd.reserved) - return -EINVAL; - - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - obj = create_cq(file, ib_dev, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_cq_cb, NULL); - - if (IS_ERR(obj)) - return PTR_ERR(obj); - - return 0; -} - -ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_resize_cq cmd; - struct ib_uverbs_resize_cq_resp resp; - struct ib_udata udata; - struct ib_cq *cq; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); - if (!cq) - return -EINVAL; - - ret = cq->device->resize_cq(cq, cmd.cqe, &udata); - if (ret) - goto out; - - resp.cqe = cq->cqe; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp.cqe)) - ret = -EFAULT; - -out: - put_cq_read(cq); - - return ret ? ret : in_len; -} - -static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) -{ - struct ib_uverbs_wc tmp; - - tmp.wr_id = wc->wr_id; - tmp.status = wc->status; - tmp.opcode = wc->opcode; - tmp.vendor_err = wc->vendor_err; - tmp.byte_len = wc->byte_len; - tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data; - tmp.qp_num = wc->qp->qp_num; - tmp.src_qp = wc->src_qp; - tmp.wc_flags = wc->wc_flags; - tmp.pkey_index = wc->pkey_index; - tmp.slid = wc->slid; - tmp.sl = wc->sl; - tmp.dlid_path_bits = wc->dlid_path_bits; - tmp.port_num = wc->port_num; - tmp.reserved = 0; - - if (copy_to_user(dest, &tmp, sizeof tmp)) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_poll_cq cmd; - struct ib_uverbs_poll_cq_resp resp; - u8 __user *header_ptr; - u8 __user *data_ptr; - struct ib_cq *cq; - struct ib_wc wc; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); - if (!cq) - return -EINVAL; - - /* we copy a struct ib_uverbs_poll_cq_resp to user space */ - header_ptr = (void __user *)(unsigned long) cmd.response; - data_ptr = header_ptr + sizeof resp; - - memset(&resp, 0, sizeof resp); - while (resp.count < cmd.ne) { - ret = ib_poll_cq(cq, 1, &wc); - if (ret < 0) - goto out_put; - if (!ret) - break; - - ret = copy_wc_to_user(data_ptr, &wc); - if (ret) - goto out_put; - - data_ptr += sizeof(struct ib_uverbs_wc); - ++resp.count; - } - - if (copy_to_user(header_ptr, &resp, sizeof resp)) { - ret = -EFAULT; - goto out_put; - } - - ret = in_len; - -out_put: - put_cq_read(cq); - return ret; -} - -ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_req_notify_cq cmd; - struct ib_cq *cq; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); - if (!cq) - return -EINVAL; - - ib_req_notify_cq(cq, cmd.solicited_only ? - IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - - put_cq_read(cq); - - return in_len; -} - -ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_destroy_cq cmd; - struct ib_uverbs_destroy_cq_resp resp; - struct ib_uobject *uobj; - struct ib_cq *cq; - struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) - return -EINVAL; - cq = uobj->object; - ev_file = cq->cq_context; - obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - - ret = ib_destroy_cq(cq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_ucq(file, ev_file, obj); - - memset(&resp, 0, sizeof resp); - resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; - - put_uobj(uobj); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -static int create_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_qp *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *udata), - void *context) -{ - struct ib_uqp_object *obj; - struct ib_device *device; - struct ib_pd *pd = NULL; - struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); - struct ib_cq *scq = NULL, *rcq = NULL; - struct ib_srq *srq = NULL; - struct ib_qp *qp; - char *buf; - struct ib_qp_init_attr attr = {}; - struct ib_uverbs_ex_create_qp_resp resp; - int ret; - struct ib_rwq_ind_table *ind_tbl = NULL; - bool has_sq = true; - - if (cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW) != 0) - return -EPERM; - - obj = kzalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, - &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); - if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + - sizeof(cmd->rwq_ind_tbl_handle) && - (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, - file->ucontext); - if (!ind_tbl) { - ret = -EINVAL; - goto err_put; - } - - attr.rwq_ind_tbl = ind_tbl; - } - - if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + - sizeof(cmd->reserved1)) && cmd->reserved1) { - ret = -EOPNOTSUPP; - goto err_put; - } - - if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { - ret = -EINVAL; - goto err_put; - } - - if (ind_tbl && !cmd->max_send_wr) - has_sq = false; - - if (cmd->qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, - &xrcd_uobj); - if (!xrcd) { - ret = -EINVAL; - goto err_put; - } - device = xrcd->device; - } else { - if (cmd->qp_type == IB_QPT_XRC_INI) { - cmd->max_recv_wr = 0; - cmd->max_recv_sge = 0; - } else { - if (cmd->is_srq) { - srq = idr_read_srq(cmd->srq_handle, - file->ucontext); - if (!srq || srq->srq_type != IB_SRQT_BASIC) { - ret = -EINVAL; - goto err_put; - } - } - - if (!ind_tbl) { - if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); - if (!rcq) { - ret = -EINVAL; - goto err_put; - } - } - } - } - - if (has_sq) - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); - if (!ind_tbl) - rcq = rcq ?: scq; - pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd || (!scq && has_sq)) { - ret = -EINVAL; - goto err_put; - } - - device = pd->device; - } - - attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; - attr.send_cq = scq; - attr.recv_cq = rcq; - attr.srq = srq; - attr.xrcd = xrcd; - attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : - IB_SIGNAL_REQ_WR; - attr.qp_type = cmd->qp_type; - attr.create_flags = 0; - - attr.cap.max_send_wr = cmd->max_send_wr; - attr.cap.max_recv_wr = cmd->max_recv_wr; - attr.cap.max_send_sge = cmd->max_send_sge; - attr.cap.max_recv_sge = cmd->max_recv_sge; - attr.cap.max_inline_data = cmd->max_inline_data; - - obj->uevent.events_reported = 0; - INIT_LIST_HEAD(&obj->uevent.event_list); - INIT_LIST_HEAD(&obj->mcast_list); - - if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + - sizeof(cmd->create_flags)) - attr.create_flags = cmd->create_flags; - - if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | - IB_QP_CREATE_CROSS_CHANNEL | - IB_QP_CREATE_MANAGED_SEND | - IB_QP_CREATE_MANAGED_RECV | - IB_QP_CREATE_SCATTER_FCS)) { - ret = -EINVAL; - goto err_put; - } - - buf = (char *)cmd + sizeof(*cmd); - if (cmd_sz > sizeof(*cmd)) - if (!(buf[0] == 0 && !memcmp(buf, buf + 1, - cmd_sz - sizeof(*cmd) - 1))) { - ret = -EINVAL; - goto err_put; - } - - if (cmd->qp_type == IB_QPT_XRC_TGT) - qp = ib_create_qp(pd, &attr); - else - qp = device->create_qp(pd, &attr, uhw); - - if (IS_ERR(qp)) { - ret = PTR_ERR(qp); - goto err_put; - } - - if (cmd->qp_type != IB_QPT_XRC_TGT) { - qp->real_qp = qp; - qp->device = device; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); - atomic_inc(&pd->usecnt); - if (attr.send_cq) - atomic_inc(&attr.send_cq->usecnt); - if (attr.recv_cq) - atomic_inc(&attr.recv_cq->usecnt); - if (attr.srq) - atomic_inc(&attr.srq->usecnt); - if (ind_tbl) - atomic_inc(&ind_tbl->usecnt); - } - qp->uobject = &obj->uevent.uobject; - - obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; - - memset(&resp, 0, sizeof resp); - resp.base.qpn = qp->qp_num; - resp.base.qp_handle = obj->uevent.uobject.id; - resp.base.max_recv_sge = attr.cap.max_recv_sge; - resp.base.max_send_sge = attr.cap.max_send_sge; - resp.base.max_recv_wr = attr.cap.max_recv_wr; - resp.base.max_send_wr = attr.cap.max_send_wr; - resp.base.max_inline_data = attr.cap.max_inline_data; - - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); - - ret = cb(file, &resp, ucore); - if (ret) - goto err_cb; - - if (xrcd) { - obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, - uobject); - atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); - } - - if (pd) - put_pd_read(pd); - if (scq) - put_cq_read(scq); - if (rcq && rcq != scq) - put_cq_read(rcq); - if (srq) - put_srq_read(srq); - if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); - - return 0; -err_cb: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - -err_destroy: - ib_destroy_qp(qp); - -err_put: - if (xrcd) - put_xrcd_read(xrcd_uobj); - if (pd) - put_pd_read(pd); - if (scq) - put_cq_read(scq); - if (rcq && rcq != scq) - put_cq_read(rcq); - if (srq) - put_srq_read(srq); - if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); - - put_uobj_write(&obj->uevent.uobject); - return ret; -} - -static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_qp cmd; - struct ib_uverbs_ex_create_qp cmd_ex; - struct ib_udata ucore; - struct ib_udata uhw; - ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); - int err; - - if (out_len < resp_size) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), - resp_size); - INIT_UDATA(&uhw, buf + sizeof(cmd), - (unsigned long)cmd.response + resp_size, - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - resp_size); - - memset(&cmd_ex, 0, sizeof(cmd_ex)); - cmd_ex.user_handle = cmd.user_handle; - cmd_ex.pd_handle = cmd.pd_handle; - cmd_ex.send_cq_handle = cmd.send_cq_handle; - cmd_ex.recv_cq_handle = cmd.recv_cq_handle; - cmd_ex.srq_handle = cmd.srq_handle; - cmd_ex.max_send_wr = cmd.max_send_wr; - cmd_ex.max_recv_wr = cmd.max_recv_wr; - cmd_ex.max_send_sge = cmd.max_send_sge; - cmd_ex.max_recv_sge = cmd.max_recv_sge; - cmd_ex.max_inline_data = cmd.max_inline_data; - cmd_ex.sq_sig_all = cmd.sq_sig_all; - cmd_ex.qp_type = cmd.qp_type; - cmd_ex.is_srq = cmd.is_srq; - - err = create_qp(file, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), is_srq) + - sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, - NULL); - - if (err) - return err; - - return in_len; -} - -static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) -{ - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; -} - -int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_qp_resp resp; - struct ib_uverbs_ex_create_qp cmd = {0}; - int err; - - if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + - sizeof(cmd.comp_mask))) - return -EINVAL; - - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (err) - return err; - - if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) - return -EINVAL; - - if (cmd.reserved) - return -EINVAL; - - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - err = create_qp(file, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_qp_cb, NULL); - - if (err) - return err; - - return 0; -} - -ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, int out_len) -{ - struct ib_uverbs_open_qp cmd; - struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; - struct ib_uqp_object *obj; - struct ib_xrcd *xrcd; - struct ib_uobject *uninitialized_var(xrcd_uobj); - struct ib_qp *qp; - struct ib_qp_open_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); - - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); - if (!xrcd) { - ret = -EINVAL; - goto err_put; - } - - attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; - attr.qp_num = cmd.qpn; - attr.qp_type = cmd.qp_type; - - obj->uevent.events_reported = 0; - INIT_LIST_HEAD(&obj->uevent.event_list); - INIT_LIST_HEAD(&obj->mcast_list); - - qp = ib_open_qp(xrcd, &attr); - if (IS_ERR(qp)) { - ret = PTR_ERR(qp); - goto err_put; - } - - qp->uobject = &obj->uevent.uobject; - - obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; - - memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_remove; - } - - obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); - atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); - - return in_len; - -err_remove: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - -err_destroy: - ib_destroy_qp(qp); - -err_put: - put_xrcd_read(xrcd_uobj); - put_uobj_write(&obj->uevent.uobject); - return ret; -} - -ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_query_qp cmd; - struct ib_uverbs_query_qp_resp resp; - struct ib_qp *qp; - struct ib_qp_attr *attr; - struct ib_qp_init_attr *init_attr; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - attr = kmalloc(sizeof *attr, GFP_KERNEL); - init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto out; - } - - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) { - ret = -EINVAL; - goto out; - } - - ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - - put_qp_read(qp); - - if (ret) - goto out; - - memset(&resp, 0, sizeof resp); - - resp.qp_state = attr->qp_state; - resp.cur_qp_state = attr->cur_qp_state; - resp.path_mtu = attr->path_mtu; - resp.path_mig_state = attr->path_mig_state; - resp.qkey = attr->qkey; - resp.rq_psn = attr->rq_psn; - resp.sq_psn = attr->sq_psn; - resp.dest_qp_num = attr->dest_qp_num; - resp.qp_access_flags = attr->qp_access_flags; - resp.pkey_index = attr->pkey_index; - resp.alt_pkey_index = attr->alt_pkey_index; - resp.sq_draining = attr->sq_draining; - resp.max_rd_atomic = attr->max_rd_atomic; - resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; - resp.min_rnr_timer = attr->min_rnr_timer; - resp.port_num = attr->port_num; - resp.timeout = attr->timeout; - resp.retry_cnt = attr->retry_cnt; - resp.rnr_retry = attr->rnr_retry; - resp.alt_port_num = attr->alt_port_num; - resp.alt_timeout = attr->alt_timeout; - - memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); - resp.dest.flow_label = attr->ah_attr.grh.flow_label; - resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; - resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; - resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; - resp.dest.dlid = attr->ah_attr.dlid; - resp.dest.sl = attr->ah_attr.sl; - resp.dest.src_path_bits = attr->ah_attr.src_path_bits; - resp.dest.static_rate = attr->ah_attr.static_rate; - resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); - resp.dest.port_num = attr->ah_attr.port_num; - - memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); - resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; - resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; - resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; - resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; - resp.alt_dest.dlid = attr->alt_ah_attr.dlid; - resp.alt_dest.sl = attr->alt_ah_attr.sl; - resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; - resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; - resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); - resp.alt_dest.port_num = attr->alt_ah_attr.port_num; - - resp.max_send_wr = init_attr->cap.max_send_wr; - resp.max_recv_wr = init_attr->cap.max_recv_wr; - resp.max_send_sge = init_attr->cap.max_send_sge; - resp.max_recv_sge = init_attr->cap.max_recv_sge; - resp.max_inline_data = init_attr->cap.max_inline_data; - resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out: - kfree(attr); - kfree(init_attr); - - return ret ? ret : in_len; -} - -/* Remove ignored fields set in the attribute mask */ -static int modify_qp_mask(enum ib_qp_type qp_type, int mask) -{ - switch (qp_type) { - case IB_QPT_XRC_INI: - return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); - case IB_QPT_XRC_TGT: - return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY); - default: - return mask; - } -} - -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_modify_qp cmd; - struct ib_udata udata; - struct ib_qp *qp; - struct ib_qp_attr *attr; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); - - attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) - return -ENOMEM; - - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) { - ret = -EINVAL; - goto out; - } - - attr->qp_state = cmd.qp_state; - attr->cur_qp_state = cmd.cur_qp_state; - attr->path_mtu = cmd.path_mtu; - attr->path_mig_state = cmd.path_mig_state; - attr->qkey = cmd.qkey; - attr->rq_psn = cmd.rq_psn; - attr->sq_psn = cmd.sq_psn; - attr->dest_qp_num = cmd.dest_qp_num; - attr->qp_access_flags = cmd.qp_access_flags; - attr->pkey_index = cmd.pkey_index; - attr->alt_pkey_index = cmd.alt_pkey_index; - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; - attr->max_rd_atomic = cmd.max_rd_atomic; - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; - attr->min_rnr_timer = cmd.min_rnr_timer; - attr->port_num = cmd.port_num; - attr->timeout = cmd.timeout; - attr->retry_cnt = cmd.retry_cnt; - attr->rnr_retry = cmd.rnr_retry; - attr->alt_port_num = cmd.alt_port_num; - attr->alt_timeout = cmd.alt_timeout; - - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; - attr->ah_attr.dlid = cmd.dest.dlid; - attr->ah_attr.sl = cmd.dest.sl; - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; - attr->ah_attr.static_rate = cmd.dest.static_rate; - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd.dest.port_num; - - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - - if (qp->real_qp == qp) { - ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); - if (ret) - goto release_qp; - ret = qp->device->modify_qp(qp, attr, - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); - } else { - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); - } - - if (ret) - goto release_qp; - - ret = in_len; - -release_qp: - put_qp_read(qp); - -out: - kfree(attr); - - return ret; -} - -ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_destroy_qp cmd; - struct ib_uverbs_destroy_qp_resp resp; - struct ib_uobject *uobj; - struct ib_qp *qp; - struct ib_uqp_object *obj; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); - if (!uobj) - return -EINVAL; - qp = uobj->object; - obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); - - if (!list_empty(&obj->mcast_list)) { - put_uobj_write(uobj); - return -EBUSY; - } - - ret = ib_destroy_qp(qp); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - if (obj->uxrcd) - atomic_dec(&obj->uxrcd->refcnt); - - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, &obj->uevent); - - resp.events_reported = obj->uevent.events_reported; - - put_uobj(uobj); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -static void *alloc_wr(size_t wr_size, __u32 num_sge) -{ - return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + - num_sge * sizeof (struct ib_sge), GFP_KERNEL); -}; - -ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_post_send cmd; - struct ib_uverbs_post_send_resp resp; - struct ib_uverbs_send_wr *user_wr; - struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; - struct ib_qp *qp; - int i, sg_ind; - int is_ud; - ssize_t ret = -EINVAL; - size_t next_size; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + - cmd.sge_count * sizeof (struct ib_uverbs_sge)) - return -EINVAL; - - if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) - return -EINVAL; - - user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); - if (!user_wr) - return -ENOMEM; - - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) - goto out; - - is_ud = qp->qp_type == IB_QPT_UD; - sg_ind = 0; - last = NULL; - for (i = 0; i < cmd.wr_count; ++i) { - if (copy_from_user(user_wr, - buf + sizeof cmd + i * cmd.wqe_size, - cmd.wqe_size)) { - ret = -EFAULT; - goto out_put; - } - - if (user_wr->num_sge + sg_ind > cmd.sge_count) { - ret = -EINVAL; - goto out_put; - } - - if (is_ud) { - struct ib_ud_wr *ud; - - if (user_wr->opcode != IB_WR_SEND && - user_wr->opcode != IB_WR_SEND_WITH_IMM) { - ret = -EINVAL; - goto out_put; - } - - next_size = sizeof(*ud); - ud = alloc_wr(next_size, user_wr->num_sge); - if (!ud) { - ret = -ENOMEM; - goto out_put; - } - - ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); - if (!ud->ah) { - kfree(ud); - ret = -EINVAL; - goto out_put; - } - ud->remote_qpn = user_wr->wr.ud.remote_qpn; - ud->remote_qkey = user_wr->wr.ud.remote_qkey; - - next = &ud->wr; - } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || - user_wr->opcode == IB_WR_RDMA_WRITE || - user_wr->opcode == IB_WR_RDMA_READ) { - struct ib_rdma_wr *rdma; - - next_size = sizeof(*rdma); - rdma = alloc_wr(next_size, user_wr->num_sge); - if (!rdma) { - ret = -ENOMEM; - goto out_put; - } - - rdma->remote_addr = user_wr->wr.rdma.remote_addr; - rdma->rkey = user_wr->wr.rdma.rkey; - - next = &rdma->wr; - } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { - struct ib_atomic_wr *atomic; - - next_size = sizeof(*atomic); - atomic = alloc_wr(next_size, user_wr->num_sge); - if (!atomic) { - ret = -ENOMEM; - goto out_put; - } - - atomic->remote_addr = user_wr->wr.atomic.remote_addr; - atomic->compare_add = user_wr->wr.atomic.compare_add; - atomic->swap = user_wr->wr.atomic.swap; - atomic->rkey = user_wr->wr.atomic.rkey; - - next = &atomic->wr; - } else if (user_wr->opcode == IB_WR_SEND || - user_wr->opcode == IB_WR_SEND_WITH_IMM || - user_wr->opcode == IB_WR_SEND_WITH_INV) { - next_size = sizeof(*next); - next = alloc_wr(next_size, user_wr->num_sge); - if (!next) { - ret = -ENOMEM; - goto out_put; - } - } else { - ret = -EINVAL; - goto out_put; - } - - if (user_wr->opcode == IB_WR_SEND_WITH_IMM || - user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { - next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; - } - - if (!last) - wr = next; - else - last->next = next; - last = next; - - next->next = NULL; - next->wr_id = user_wr->wr_id; - next->num_sge = user_wr->num_sge; - next->opcode = user_wr->opcode; - next->send_flags = user_wr->send_flags; - - if (next->num_sge) { - next->sg_list = (void *)((char *)next + - ALIGN(next_size, sizeof(struct ib_sge))); - if (copy_from_user(next->sg_list, - (const char *)buf + sizeof cmd + - cmd.wr_count * cmd.wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { - ret = -EFAULT; - goto out_put; - } - sg_ind += next->num_sge; - } else - next->sg_list = NULL; - } - - resp.bad_wr = 0; - ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); - if (ret) - for (next = wr; next; next = next->next) { - ++resp.bad_wr; - if (next == bad_wr) - break; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out_put: - put_qp_read(qp); - - while (wr) { - if (is_ud && ud_wr(wr)->ah) - put_ah_read(ud_wr(wr)->ah); - next = wr->next; - kfree(wr); - wr = next; - } - -out: - kfree(user_wr); - - return ret ? ret : in_len; -} - -static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, - int in_len, - u32 wr_count, - u32 sge_count, - u32 wqe_size) -{ - struct ib_uverbs_recv_wr *user_wr; - struct ib_recv_wr *wr = NULL, *last, *next; - int sg_ind; - int i; - int ret; - - if (in_len < wqe_size * wr_count + - sge_count * sizeof (struct ib_uverbs_sge)) - return ERR_PTR(-EINVAL); - - if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) - return ERR_PTR(-EINVAL); - - user_wr = kmalloc(wqe_size, GFP_KERNEL); - if (!user_wr) - return ERR_PTR(-ENOMEM); - - sg_ind = 0; - last = NULL; - for (i = 0; i < wr_count; ++i) { - if (copy_from_user(user_wr, buf + i * wqe_size, - wqe_size)) { - ret = -EFAULT; - goto err; - } - - if (user_wr->num_sge + sg_ind > sge_count) { - ret = -EINVAL; - goto err; - } - - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; - goto err; - } - - if (!last) - wr = next; - else - last->next = next; - last = next; - - next->next = NULL; - next->wr_id = user_wr->wr_id; - next->num_sge = user_wr->num_sge; - - if (next->num_sge) { - next->sg_list = (void *)((char *)next + - ALIGN(sizeof *next, sizeof (struct ib_sge))); - if (copy_from_user(next->sg_list, - (const char *)buf + wr_count * wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { - ret = -EFAULT; - goto err; - } - sg_ind += next->num_sge; - } else - next->sg_list = NULL; - } - - kfree(user_wr); - return wr; - -err: - kfree(user_wr); - - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - return ERR_PTR(ret); -} - -ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_post_recv cmd; - struct ib_uverbs_post_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; - struct ib_qp *qp; - ssize_t ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); - if (IS_ERR(wr)) - return PTR_ERR(wr); - - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) - goto out; - - resp.bad_wr = 0; - ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); - - put_qp_read(qp); - - if (ret) - for (next = wr; next; next = next->next) { - ++resp.bad_wr; - if (next == bad_wr) - break; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out: - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_post_srq_recv cmd; - struct ib_uverbs_post_srq_recv_resp resp; - struct ib_recv_wr *wr, *next, *bad_wr; - struct ib_srq *srq; - ssize_t ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); - if (IS_ERR(wr)) - return PTR_ERR(wr); - - srq = idr_read_srq(cmd.srq_handle, file->ucontext); - if (!srq) - goto out; - - resp.bad_wr = 0; - ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - - put_srq_read(srq); - - if (ret) - for (next = wr; next; next = next->next) { - ++resp.bad_wr; - if (next == bad_wr) - break; - } - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - -out: - while (wr) { - next = wr->next; - kfree(wr); - wr = next; - } - - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_ah cmd; - struct ib_uverbs_create_ah_resp resp; - struct ib_uobject *uobj; - struct ib_pd *pd; - struct ib_ah *ah; - struct ib_ah_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); - down_write(&uobj->mutex); - - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err; - } - - attr.dlid = cmd.attr.dlid; - attr.sl = cmd.attr.sl; - attr.src_path_bits = cmd.attr.src_path_bits; - attr.static_rate = cmd.attr.static_rate; - attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; - attr.port_num = cmd.attr.port_num; - attr.grh.flow_label = cmd.attr.grh.flow_label; - attr.grh.sgid_index = cmd.attr.grh.sgid_index; - attr.grh.hop_limit = cmd.attr.grh.hop_limit; - attr.grh.traffic_class = cmd.attr.grh.traffic_class; - memset(&attr.dmac, 0, sizeof(attr.dmac)); - memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - - ah = ib_create_ah(pd, &attr); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - goto err_put; - } - - ah->uobject = uobj; - uobj->object = ah; - - ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); - if (ret) - goto err_destroy; - - resp.ah_handle = uobj->id; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } - - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->ah_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - - return in_len; - -err_copy: - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - -err_destroy: - ib_destroy_ah(ah); - -err_put: - put_pd_read(pd); - -err: - put_uobj_write(uobj); - return ret; -} - -ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, int out_len) -{ - struct ib_uverbs_destroy_ah cmd; - struct ib_ah *ah; - struct ib_uobject *uobj; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); - if (!uobj) - return -EINVAL; - ah = uobj->object; - - ret = ib_destroy_ah(ah); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; -} - -ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_attach_mcast cmd; - struct ib_qp *qp; - struct ib_uqp_object *obj; - struct ib_uverbs_mcast_entry *mcast; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - qp = idr_write_qp(cmd.qp_handle, file->ucontext); - if (!qp) - return -EINVAL; - - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - - list_for_each_entry(mcast, &obj->mcast_list, list) - if (cmd.mlid == mcast->lid && - !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { - ret = 0; - goto out_put; - } - - mcast = kmalloc(sizeof *mcast, GFP_KERNEL); - if (!mcast) { - ret = -ENOMEM; - goto out_put; - } - - mcast->lid = cmd.mlid; - memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); - - ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); - if (!ret) - list_add_tail(&mcast->list, &obj->mcast_list); - else - kfree(mcast); - -out_put: - put_qp_write(qp); - - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_detach_mcast cmd; - struct ib_uqp_object *obj; - struct ib_qp *qp; - struct ib_uverbs_mcast_entry *mcast; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - qp = idr_write_qp(cmd.qp_handle, file->ucontext); - if (!qp) - return -EINVAL; - - ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); - if (ret) - goto out_put; - - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - - list_for_each_entry(mcast, &obj->mcast_list, list) - if (cmd.mlid == mcast->lid && - !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { - list_del(&mcast->list); - kfree(mcast); - break; - } - -out_put: - put_qp_write(qp); - - return ret ? ret : in_len; -} - -static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) -{ - /* Returns user space filter size, includes padding */ - return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; -} - -static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, - u16 ib_real_filter_sz) -{ - /* - * User space filter structures must be 64 bit aligned, otherwise this - * may pass, but we won't handle additional new attributes. - */ - - if (kern_filter_size > ib_real_filter_sz) { - if (memchr_inv((char *)kern_spec_filter + - ib_real_filter_sz, 0, - kern_filter_size - ib_real_filter_sz)) - return -EINVAL; - return ib_real_filter_sz; - } - return kern_filter_size; -} - -static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) -{ - ssize_t actual_filter_sz; - ssize_t kern_filter_sz; - ssize_t ib_filter_sz; - void *kern_spec_mask; - void *kern_spec_val; - - if (kern_spec->reserved) - return -EINVAL; - - ib_spec->type = kern_spec->type; - - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); - /* User flow spec size must be aligned to 4 bytes */ - if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) - return -EINVAL; - - kern_spec_val = (char *)kern_spec + - sizeof(struct ib_uverbs_flow_spec_hdr); - kern_spec_mask = (char *)kern_spec_val + kern_filter_sz; - - switch (ib_spec->type) { - case IB_FLOW_SPEC_ETH: - ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); - actual_filter_sz = spec_filter_size(kern_spec_mask, - kern_filter_sz, - ib_filter_sz); - if (actual_filter_sz <= 0) - return -EINVAL; - ib_spec->size = sizeof(struct ib_flow_spec_eth); - memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz); - memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); - break; - case IB_FLOW_SPEC_IPV4: - ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); - actual_filter_sz = spec_filter_size(kern_spec_mask, - kern_filter_sz, - ib_filter_sz); - if (actual_filter_sz <= 0) - return -EINVAL; - ib_spec->size = sizeof(struct ib_flow_spec_ipv4); - memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz); - memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); - break; - case IB_FLOW_SPEC_IPV6: - ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); - actual_filter_sz = spec_filter_size(kern_spec_mask, - kern_filter_sz, - ib_filter_sz); - if (actual_filter_sz <= 0) - return -EINVAL; - ib_spec->size = sizeof(struct ib_flow_spec_ipv6); - memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); - memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); - - if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) || - (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) - return -EINVAL; - break; - case IB_FLOW_SPEC_TCP: - case IB_FLOW_SPEC_UDP: - ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); - actual_filter_sz = spec_filter_size(kern_spec_mask, - kern_filter_sz, - ib_filter_sz); - if (actual_filter_sz <= 0) - return -EINVAL; - ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); - memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); - memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); - break; - default: - return -EINVAL; - } - return 0; -} - -int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_wq cmd = {}; - struct ib_uverbs_ex_create_wq_resp resp = {}; - struct ib_uwq_object *obj; - int err = 0; - struct ib_cq *cq; - struct ib_pd *pd; - struct ib_wq *wq; - struct ib_wq_init_attr wq_init_attr = {}; - size_t required_cmd_sz; - size_t required_resp_len; - - required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); - required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (err) - return err; - - if (cmd.comp_mask) - return -EOPNOTSUPP; - - obj = kmalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, - &wq_lock_class); - down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - if (!pd) { - err = -EINVAL; - goto err_uobj; - } - - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); - if (!cq) { - err = -EINVAL; - goto err_put_pd; - } - - wq_init_attr.cq = cq; - wq_init_attr.max_sge = cmd.max_sge; - wq_init_attr.max_wr = cmd.max_wr; - wq_init_attr.wq_context = file; - wq_init_attr.wq_type = cmd.wq_type; - wq_init_attr.event_handler = ib_uverbs_wq_event_handler; - obj->uevent.events_reported = 0; - INIT_LIST_HEAD(&obj->uevent.event_list); - wq = pd->device->create_wq(pd, &wq_init_attr, uhw); - if (IS_ERR(wq)) { - err = PTR_ERR(wq); - goto err_put_cq; - } - - wq->uobject = &obj->uevent.uobject; - obj->uevent.uobject.object = wq; - wq->wq_type = wq_init_attr.wq_type; - wq->cq = cq; - wq->pd = pd; - wq->device = pd->device; - wq->wq_context = wq_init_attr.wq_context; - atomic_set(&wq->usecnt, 0); - atomic_inc(&pd->usecnt); - atomic_inc(&cq->usecnt); - wq->uobject = &obj->uevent.uobject; - obj->uevent.uobject.object = wq; - err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); - if (err) - goto destroy_wq; - - memset(&resp, 0, sizeof(resp)); - resp.wq_handle = obj->uevent.uobject.id; - resp.max_sge = wq_init_attr.max_sge; - resp.max_wr = wq_init_attr.max_wr; - resp.wqn = wq->wq_num; - resp.response_length = required_resp_len; - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); - if (err) - goto err_copy; - - put_pd_read(pd); - put_cq_read(cq); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); - return 0; - -err_copy: - idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); -destroy_wq: - ib_destroy_wq(wq); -err_put_cq: - put_cq_read(cq); -err_put_pd: - put_pd_read(pd); -err_uobj: - put_uobj_write(&obj->uevent.uobject); - - return err; -} - -int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_destroy_wq cmd = {}; - struct ib_uverbs_ex_destroy_wq_resp resp = {}; - struct ib_wq *wq; - struct ib_uobject *uobj; - struct ib_uwq_object *obj; - size_t required_cmd_sz; - size_t required_resp_len; - int ret; - - required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); - required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (ret) - return ret; - - if (cmd.comp_mask) - return -EOPNOTSUPP; - - resp.response_length = required_resp_len; - uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - - wq = uobj->object; - obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - ret = ib_destroy_wq(wq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, &obj->uevent); - resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); - - ret = ib_copy_to_udata(ucore, &resp, resp.response_length); - if (ret) - return ret; - - return 0; -} - -int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_modify_wq cmd = {}; - struct ib_wq *wq; - struct ib_wq_attr wq_attr = {}; - size_t required_cmd_sz; - int ret; - - required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (ret) - return ret; - - if (!cmd.attr_mask) - return -EINVAL; - - if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) - return -EINVAL; - - wq = idr_read_wq(cmd.wq_handle, file->ucontext); - if (!wq) - return -EINVAL; - - wq_attr.curr_wq_state = cmd.curr_wq_state; - wq_attr.wq_state = cmd.wq_state; - ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); - put_wq_read(wq); - return ret; -} - -int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; - struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; - struct ib_uobject *uobj; - int err = 0; - struct ib_rwq_ind_table_init_attr init_attr = {}; - struct ib_rwq_ind_table *rwq_ind_tbl; - struct ib_wq **wqs = NULL; - u32 *wqs_handles = NULL; - struct ib_wq *wq = NULL; - int i, j, num_read_wqs; - u32 num_wq_handles; - u32 expected_in_size; - size_t required_cmd_sz_header; - size_t required_resp_len; - - required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); - required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); - - if (ucore->inlen < required_cmd_sz_header) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); - if (err) - return err; - - ucore->inbuf = (const char *)ucore->inbuf + required_cmd_sz_header; - ucore->inlen -= required_cmd_sz_header; - - if (cmd.comp_mask) - return -EOPNOTSUPP; - - if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) - return -EINVAL; - - num_wq_handles = 1 << cmd.log_ind_tbl_size; - expected_in_size = num_wq_handles * sizeof(__u32); - if (num_wq_handles == 1) - /* input size for wq handles is u64 aligned */ - expected_in_size += sizeof(__u32); - - if (ucore->inlen < expected_in_size) - return -EINVAL; - - if (ucore->inlen > expected_in_size && - !ib_is_udata_cleared(ucore, expected_in_size, - ucore->inlen - expected_in_size)) - return -EOPNOTSUPP; - - wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), - GFP_KERNEL); - if (!wqs_handles) - return -ENOMEM; - - err = ib_copy_from_udata(wqs_handles, ucore, - num_wq_handles * sizeof(__u32)); - if (err) - goto err_free; - - wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); - if (!wqs) { - err = -ENOMEM; - goto err_free; - } - - for (num_read_wqs = 0; num_read_wqs < num_wq_handles; - num_read_wqs++) { - wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); - if (!wq) { - err = -EINVAL; - goto put_wqs; - } - - wqs[num_read_wqs] = wq; - } - - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; - goto put_wqs; - } - - init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); - down_write(&uobj->mutex); - init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; - init_attr.ind_tbl = wqs; - rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); - - if (IS_ERR(rwq_ind_tbl)) { - err = PTR_ERR(rwq_ind_tbl); - goto err_uobj; - } - - rwq_ind_tbl->ind_tbl = wqs; - rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; - rwq_ind_tbl->uobject = uobj; - uobj->object = rwq_ind_tbl; - rwq_ind_tbl->device = ib_dev; - atomic_set(&rwq_ind_tbl->usecnt, 0); - - for (i = 0; i < num_wq_handles; i++) - atomic_inc(&wqs[i]->usecnt); - - err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - if (err) - goto destroy_ind_tbl; - - resp.ind_tbl_handle = uobj->id; - resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; - resp.response_length = required_resp_len; - - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); - if (err) - goto err_copy; - - kfree(wqs_handles); - - for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - return 0; - -err_copy: - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); -destroy_ind_tbl: - ib_destroy_rwq_ind_table(rwq_ind_tbl); -err_uobj: - put_uobj_write(uobj); -put_wqs: - for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); -err_free: - kfree(wqs_handles); - kfree(wqs); - return err; -} - -int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_rwq_ind_table *rwq_ind_tbl; - struct ib_uobject *uobj; - int ret; - struct ib_wq **ind_tbl; - size_t required_cmd_sz; - - required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (ret) - return ret; - - if (cmd.comp_mask) - return -EOPNOTSUPP; - - uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - rwq_ind_tbl = uobj->object; - ind_tbl = rwq_ind_tbl->ind_tbl; - - ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - kfree(ind_tbl); - return ret; -} - -int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_create_flow cmd; - struct ib_uverbs_create_flow_resp resp; - struct ib_uobject *uobj; - struct ib_flow *flow_id; - struct ib_uverbs_flow_attr *kern_flow_attr; - struct ib_flow_attr *flow_attr; - struct ib_qp *qp; - int err = 0; - void *kern_spec; - void *ib_spec; - int i; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - if (ucore->outlen < sizeof(resp)) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; - - ucore->inbuf = (const char *)ucore->inbuf + sizeof(cmd); - ucore->inlen -= sizeof(cmd); - - if (cmd.comp_mask) - return -EINVAL; - - if (priv_check(curthread, PRIV_NET_RAW) != 0) - return -EPERM; - - if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) - return -EINVAL; - - if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && - ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) || - (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT))) - return -EINVAL; - - if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) - return -EINVAL; - - if (cmd.flow_attr.size > ucore->inlen || - cmd.flow_attr.size > - (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) - return -EINVAL; - - if (cmd.flow_attr.reserved[0] || - cmd.flow_attr.reserved[1]) - return -EINVAL; - - if (cmd.flow_attr.num_of_specs) { - kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, - GFP_KERNEL); - if (!kern_flow_attr) - return -ENOMEM; - - memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); - err = ib_copy_from_udata(kern_flow_attr + 1, ucore, - cmd.flow_attr.size); - if (err) - goto err_free_attr; - } else { - kern_flow_attr = &cmd.flow_attr; - } - - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; - goto err_free_attr; - } - init_uobj(uobj, 0, file->ucontext, &rule_lock_class); - down_write(&uobj->mutex); - - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) { - err = -EINVAL; - goto err_uobj; - } - - flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * - sizeof(union ib_flow_spec), GFP_KERNEL); - if (!flow_attr) { - err = -ENOMEM; - goto err_put; - } - - flow_attr->type = kern_flow_attr->type; - flow_attr->priority = kern_flow_attr->priority; - flow_attr->num_of_specs = kern_flow_attr->num_of_specs; - flow_attr->port = kern_flow_attr->port; - flow_attr->flags = kern_flow_attr->flags; - flow_attr->size = sizeof(*flow_attr); - - kern_spec = kern_flow_attr + 1; - ib_spec = flow_attr + 1; - for (i = 0; i < flow_attr->num_of_specs && - cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && - cmd.flow_attr.size >= - ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(kern_spec, ib_spec); - if (err) - goto err_free; - flow_attr->size += - ((union ib_flow_spec *) ib_spec)->size; - cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; - kern_spec = (char *)kern_spec + ((struct ib_uverbs_flow_spec *) kern_spec)->size; - ib_spec = (char *)ib_spec + ((union ib_flow_spec *)ib_spec)->size; - } - if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { - pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", - i, cmd.flow_attr.size); - err = -EINVAL; - goto err_free; - } - flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); - if (IS_ERR(flow_id)) { - err = PTR_ERR(flow_id); - goto err_free; - } - flow_id->qp = qp; - flow_id->uobject = uobj; - uobj->object = flow_id; - - err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); - if (err) - goto destroy_flow; - - memset(&resp, 0, sizeof(resp)); - resp.flow_handle = uobj->id; - - err = ib_copy_to_udata(ucore, - &resp, sizeof(resp)); - if (err) - goto err_copy; - - put_qp_read(qp); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rule_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - kfree(flow_attr); - if (cmd.flow_attr.num_of_specs) - kfree(kern_flow_attr); - return 0; -err_copy: - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); -destroy_flow: - ib_destroy_flow(flow_id); -err_free: - kfree(flow_attr); -err_put: - put_qp_read(qp); -err_uobj: - put_uobj_write(uobj); -err_free_attr: - if (cmd.flow_attr.num_of_specs) - kfree(kern_flow_attr); - return err; -} - -int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_destroy_flow cmd; - struct ib_flow *flow_id; - struct ib_uobject *uobj; - int ret; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (ret) - return ret; - - if (cmd.comp_mask) - return -EINVAL; - - uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - flow_id = uobj->object; - - ret = ib_destroy_flow(flow_id); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return ret; -} - -static int __uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_uverbs_create_xsrq *cmd, - struct ib_udata *udata) -{ - struct ib_uverbs_create_srq_resp resp; - struct ib_usrq_object *obj; - struct ib_pd *pd; - struct ib_srq *srq; - struct ib_uobject *uninitialized_var(xrcd_uobj); - struct ib_srq_init_attr attr; - int ret; - - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); - down_write(&obj->uevent.uobject.mutex); - - if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); - if (!attr.ext.xrc.xrcd) { - ret = -EINVAL; - goto err; - } - - obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); - atomic_inc(&obj->uxrcd->refcnt); - - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); - if (!attr.ext.xrc.cq) { - ret = -EINVAL; - goto err_put_xrcd; - } - } - - pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err_put_cq; - } - - attr.event_handler = ib_uverbs_srq_event_handler; - attr.srq_context = file; - attr.srq_type = cmd->srq_type; - attr.attr.max_wr = cmd->max_wr; - attr.attr.max_sge = cmd->max_sge; - attr.attr.srq_limit = cmd->srq_limit; - - obj->uevent.events_reported = 0; - INIT_LIST_HEAD(&obj->uevent.event_list); - - srq = pd->device->create_srq(pd, &attr, udata); - if (IS_ERR(srq)) { - ret = PTR_ERR(srq); - goto err_put; - } - - srq->device = pd->device; - srq->pd = pd; - srq->srq_type = cmd->srq_type; - srq->uobject = &obj->uevent.uobject; - srq->event_handler = attr.event_handler; - srq->srq_context = attr.srq_context; - - if (cmd->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.cq = attr.ext.xrc.cq; - srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; - atomic_inc(&attr.ext.xrc.cq->usecnt); - atomic_inc(&attr.ext.xrc.xrcd->usecnt); - } - - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); - - obj->uevent.uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; - - memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uevent.uobject.id; - resp.max_wr = attr.attr.max_wr; - resp.max_sge = attr.attr.max_sge; - if (cmd->srq_type == IB_SRQT_XRC) - resp.srqn = srq->ext.xrc.srq_num; - - if (copy_to_user((void __user *) (unsigned long) cmd->response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } - - if (cmd->srq_type == IB_SRQT_XRC) { - put_uobj_read(xrcd_uobj); - put_cq_read(attr.ext.xrc.cq); - } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); - - return 0; - -err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - -err_destroy: - ib_destroy_srq(srq); - -err_put: - put_pd_read(pd); - -err_put_cq: - if (cmd->srq_type == IB_SRQT_XRC) - put_cq_read(attr.ext.xrc.cq); - -err_put_xrcd: - if (cmd->srq_type == IB_SRQT_XRC) { - atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); - } - -err: - put_uobj_write(&obj->uevent.uobject); - return ret; -} - -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_srq cmd; - struct ib_uverbs_create_xsrq xcmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - xcmd.response = cmd.response; - xcmd.user_handle = cmd.user_handle; - xcmd.srq_type = IB_SRQT_BASIC; - xcmd.pd_handle = cmd.pd_handle; - xcmd.max_wr = cmd.max_wr; - xcmd.max_sge = cmd.max_sge; - xcmd.srq_limit = cmd.srq_limit; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); - - ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); - if (ret) - return ret; - - return in_len; -} - -ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, int out_len) -{ - struct ib_uverbs_create_xsrq cmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); - - ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); - if (ret) - return ret; - - return in_len; -} - -ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_modify_srq cmd; - struct ib_udata udata; - struct ib_srq *srq; - struct ib_srq_attr attr; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); - - srq = idr_read_srq(cmd.srq_handle, file->ucontext); - if (!srq) - return -EINVAL; - - attr.max_wr = cmd.max_wr; - attr.srq_limit = cmd.srq_limit; - - ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); - - put_srq_read(srq); - - return ret ? ret : in_len; -} - -ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_srq cmd; - struct ib_uverbs_query_srq_resp resp; - struct ib_srq_attr attr; - struct ib_srq *srq; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - srq = idr_read_srq(cmd.srq_handle, file->ucontext); - if (!srq) - return -EINVAL; - - ret = ib_query_srq(srq, &attr); - - put_srq_read(srq); - - if (ret) - return ret; - - memset(&resp, 0, sizeof resp); - - resp.max_wr = attr.max_wr; - resp.max_sge = attr.max_sge; - resp.srq_limit = attr.srq_limit; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_destroy_srq cmd; - struct ib_uverbs_destroy_srq_resp resp; - struct ib_uobject *uobj; - struct ib_srq *srq; - struct ib_uevent_object *obj; - int ret = -EINVAL; - struct ib_usrq_object *us; - enum ib_srq_type srq_type; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); - if (!uobj) - return -EINVAL; - srq = uobj->object; - obj = container_of(uobj, struct ib_uevent_object, uobject); - srq_type = srq->srq_type; - - ret = ib_destroy_srq(srq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - if (srq_type == IB_SRQT_XRC) { - us = container_of(obj, struct ib_usrq_object, uevent); - atomic_dec(&us->uxrcd->refcnt); - } - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, obj); - - memset(&resp, 0, sizeof resp); - resp.events_reported = obj->events_reported; - - put_uobj(uobj); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - - return ret ? ret : in_len; -} - -int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_query_device_resp resp = { {0} }; - struct ib_uverbs_ex_query_device cmd; - struct ib_device_attr attr = {0}; - int err; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; - - if (cmd.comp_mask) - return -EINVAL; - - if (cmd.reserved) - return -EINVAL; - - resp.response_length = offsetof(typeof(resp), odp_caps); - - if (ucore->outlen < resp.response_length) - return -ENOSPC; - - err = ib_dev->query_device(ib_dev, &attr, uhw); - if (err) - return err; - - copy_query_dev_fields(file, ib_dev, &resp.base, &attr); - - if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) - goto end; - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - resp.odp_caps.general_caps = attr.odp_caps.general_caps; - resp.odp_caps.per_transport_caps.rc_odp_caps = - attr.odp_caps.per_transport_caps.rc_odp_caps; - resp.odp_caps.per_transport_caps.uc_odp_caps = - attr.odp_caps.per_transport_caps.uc_odp_caps; - resp.odp_caps.per_transport_caps.ud_odp_caps = - attr.odp_caps.per_transport_caps.ud_odp_caps; -#endif - resp.response_length += sizeof(resp.odp_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) - goto end; - - resp.timestamp_mask = attr.timestamp_mask; - resp.response_length += sizeof(resp.timestamp_mask); - - if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) - goto end; - - resp.hca_core_clock = attr.hca_core_clock; - resp.response_length += sizeof(resp.hca_core_clock); - - if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) - goto end; - - resp.device_cap_flags_ex = attr.device_cap_flags; - resp.response_length += sizeof(resp.device_cap_flags_ex); - - if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) - goto end; - - resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; - resp.rss_caps.max_rwq_indirection_tables = - attr.rss_caps.max_rwq_indirection_tables; - resp.rss_caps.max_rwq_indirection_table_size = - attr.rss_caps.max_rwq_indirection_table_size; - - resp.response_length += sizeof(resp.rss_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) - goto end; - - resp.max_wq_type_rq = attr.max_wq_type_rq; - resp.response_length += sizeof(resp.max_wq_type_rq); -end: - err = ib_copy_to_udata(ucore, &resp, resp.response_length); - return err; -} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs_cmd.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_agent.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_agent.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_agent.c (revision 320592) @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. + * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include + +#include "agent.h" +#include "smi.h" +#include "mad_priv.h" + +#define SPFX "ib_agent: " + +struct ib_agent_port_private { + struct list_head port_list; + struct ib_mad_agent *agent[2]; +}; + +static DEFINE_SPINLOCK(ib_agent_port_list_lock); +static LIST_HEAD(ib_agent_port_list); + +static struct ib_agent_port_private * +__ib_get_agent_port(const struct ib_device *device, int port_num) +{ + struct ib_agent_port_private *entry; + + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if (entry->agent[1]->device == device && + entry->agent[1]->port_num == port_num) + return entry; + } + return NULL; +} + +static struct ib_agent_port_private * +ib_get_agent_port(const struct ib_device *device, int port_num) +{ + struct ib_agent_port_private *entry; + unsigned long flags; + + spin_lock_irqsave(&ib_agent_port_list_lock, flags); + entry = __ib_get_agent_port(device, port_num); + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + return entry; +} + +void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa) +{ + struct ib_agent_port_private *port_priv; + struct ib_mad_agent *agent; + struct ib_mad_send_buf *send_buf; + struct ib_ah *ah; + struct ib_mad_send_wr_private *mad_send_wr; + + if (rdma_cap_ib_switch(device)) + port_priv = ib_get_agent_port(device, 0); + else + port_priv = ib_get_agent_port(device, port_num); + + if (!port_priv) { + dev_err(&device->dev, "Unable to find port agent\n"); + return; + } + + agent = port_priv->agent[qpn]; + ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); + if (IS_ERR(ah)) { + dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n", + PTR_ERR(ah)); + return; + } + + if (opa && mad_hdr->base_version != OPA_MGMT_BASE_VERSION) + resp_mad_len = IB_MGMT_MAD_SIZE; + + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, + IB_MGMT_MAD_HDR, + resp_mad_len - IB_MGMT_MAD_HDR, + GFP_KERNEL, + mad_hdr->base_version); + if (IS_ERR(send_buf)) { + dev_err(&device->dev, "ib_create_send_mad error\n"); + goto err1; + } + + memcpy(send_buf->mad, mad_hdr, resp_mad_len); + send_buf->ah = ah; + + if (rdma_cap_ib_switch(device)) { + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_send_wr->send_wr.port_num = port_num; + } + + if (ib_post_send_mad(send_buf, NULL)) { + dev_err(&device->dev, "ib_post_send_mad error\n"); + goto err2; + } + return; +err2: + ib_free_send_mad(send_buf); +err1: + ib_destroy_ah(ah); +} + +static void agent_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); +} + +int ib_agent_port_open(struct ib_device *device, int port_num) +{ + struct ib_agent_port_private *port_priv; + unsigned long flags; + int ret; + + /* Create new device info */ + port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); + if (!port_priv) { + dev_err(&device->dev, "No memory for ib_agent_port_private\n"); + ret = -ENOMEM; + goto error1; + } + + if (rdma_cap_ib_smi(device, port_num)) { + /* Obtain send only MAD agent for SMI QP */ + port_priv->agent[0] = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, NULL, 0, + &agent_send_handler, + NULL, NULL, 0); + if (IS_ERR(port_priv->agent[0])) { + ret = PTR_ERR(port_priv->agent[0]); + goto error2; + } + } + + /* Obtain send only MAD agent for GSI QP */ + port_priv->agent[1] = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, NULL, 0, + &agent_send_handler, + NULL, NULL, 0); + if (IS_ERR(port_priv->agent[1])) { + ret = PTR_ERR(port_priv->agent[1]); + goto error3; + } + + spin_lock_irqsave(&ib_agent_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_agent_port_list); + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + + return 0; + +error3: + if (port_priv->agent[0]) + ib_unregister_mad_agent(port_priv->agent[0]); +error2: + kfree(port_priv); +error1: + return ret; +} + +int ib_agent_port_close(struct ib_device *device, int port_num) +{ + struct ib_agent_port_private *port_priv; + unsigned long flags; + + spin_lock_irqsave(&ib_agent_port_list_lock, flags); + port_priv = __ib_get_agent_port(device, port_num); + if (port_priv == NULL) { + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + dev_err(&device->dev, "Port %d not found\n", port_num); + return -ENODEV; + } + list_del(&port_priv->port_list); + spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); + + ib_unregister_mad_agent(port_priv->agent[1]); + if (port_priv->agent[0]) + ib_unregister_mad_agent(port_priv->agent[0]); + + kfree(port_priv); + return 0; +} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_agent.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cm.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cm.c (revision 320592) @@ -0,0 +1,4141 @@ +/* + * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include "cm_msgs.h" + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("InfiniBand CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static void cm_add_one(struct ib_device *device); +static void cm_remove_one(struct ib_device *device, void *client_data); + +static struct ib_client cm_client = { + .name = "cm", + .add = cm_add_one, + .remove = cm_remove_one +}; + +static struct ib_cm { + spinlock_t lock; + struct list_head device_list; + rwlock_t device_lock; + struct rb_root listen_service_table; + u64 listen_service_id; + /* struct rb_root peer_service_table; todo: fix peer to peer */ + struct rb_root remote_qp_table; + struct rb_root remote_id_table; + struct rb_root remote_sidr_table; + struct idr local_id_table; + __be32 random_id_operand; + struct list_head timewait_list; + struct workqueue_struct *wq; + /* Sync on cm change port state */ + spinlock_t state_lock; +} cm; + +/* Counter indexes ordered by attribute ID */ +enum { + CM_REQ_COUNTER, + CM_MRA_COUNTER, + CM_REJ_COUNTER, + CM_REP_COUNTER, + CM_RTU_COUNTER, + CM_DREQ_COUNTER, + CM_DREP_COUNTER, + CM_SIDR_REQ_COUNTER, + CM_SIDR_REP_COUNTER, + CM_LAP_COUNTER, + CM_APR_COUNTER, + CM_ATTR_COUNT, + CM_ATTR_ID_OFFSET = 0x0010, +}; + +enum { + CM_XMIT, + CM_XMIT_RETRIES, + CM_RECV, + CM_RECV_DUPLICATES, + CM_COUNTER_GROUPS +}; + +static char const counter_group_names[CM_COUNTER_GROUPS] + [sizeof("cm_rx_duplicates")] = { + "cm_tx_msgs", "cm_tx_retries", + "cm_rx_msgs", "cm_rx_duplicates" +}; + +struct cm_counter_group { + struct kobject obj; + atomic_long_t counter[CM_ATTR_COUNT]; +}; + +struct cm_counter_attribute { + struct attribute attr; + int index; +}; + +#define CM_COUNTER_ATTR(_name, _index) \ +struct cm_counter_attribute cm_##_name##_counter_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .index = _index \ +} + +static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); +static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); +static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); +static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); +static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); +static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); +static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); +static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); +static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); +static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); +static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); + +static struct attribute *cm_counter_default_attrs[] = { + &cm_req_counter_attr.attr, + &cm_mra_counter_attr.attr, + &cm_rej_counter_attr.attr, + &cm_rep_counter_attr.attr, + &cm_rtu_counter_attr.attr, + &cm_dreq_counter_attr.attr, + &cm_drep_counter_attr.attr, + &cm_sidr_req_counter_attr.attr, + &cm_sidr_rep_counter_attr.attr, + &cm_lap_counter_attr.attr, + &cm_apr_counter_attr.attr, + NULL +}; + +struct cm_port { + struct cm_device *cm_dev; + struct ib_mad_agent *mad_agent; + struct kobject port_obj; + u8 port_num; + struct list_head cm_priv_prim_list; + struct list_head cm_priv_altr_list; + struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; +}; + +struct cm_device { + struct list_head list; + struct ib_device *ib_device; + struct device *device; + u8 ack_delay; + int going_down; + struct cm_port *port[0]; +}; + +struct cm_av { + struct cm_port *port; + union ib_gid dgid; + struct ib_ah_attr ah_attr; + u16 pkey_index; + u8 timeout; +}; + +struct cm_work { + struct delayed_work work; + struct list_head list; + struct cm_port *port; + struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ + __be32 local_id; /* Established / timewait */ + __be32 remote_id; + struct ib_cm_event cm_event; + struct ib_sa_path_rec path[0]; +}; + +struct cm_timewait_info { + struct cm_work work; /* Must be first. */ + struct list_head list; + struct rb_node remote_qp_node; + struct rb_node remote_id_node; + __be64 remote_ca_guid; + __be32 remote_qpn; + u8 inserted_remote_qp; + u8 inserted_remote_id; +}; + +struct cm_id_private { + struct ib_cm_id id; + + struct rb_node service_node; + struct rb_node sidr_id_node; + spinlock_t lock; /* Do not acquire inside cm.lock */ + struct completion comp; + atomic_t refcount; + /* Number of clients sharing this ib_cm_id. Only valid for listeners. + * Protected by the cm.lock spinlock. */ + int listen_sharecount; + + struct ib_mad_send_buf *msg; + struct cm_timewait_info *timewait_info; + /* todo: use alternate port on send failure */ + struct cm_av av; + struct cm_av alt_av; + + void *private_data; + __be64 tid; + __be32 local_qpn; + __be32 remote_qpn; + enum ib_qp_type qp_type; + __be32 sq_psn; + __be32 rq_psn; + int timeout_ms; + enum ib_mtu path_mtu; + __be16 pkey; + u8 private_data_len; + u8 max_cm_retries; + u8 peer_to_peer; + u8 responder_resources; + u8 initiator_depth; + u8 retry_count; + u8 rnr_retry_count; + u8 service_timeout; + u8 target_ack_delay; + + struct list_head prim_list; + struct list_head altr_list; + /* Indicates that the send port mad is registered and av is set */ + int prim_send_port_not_ready; + int altr_send_port_not_ready; + + struct list_head work_list; + atomic_t work_count; +}; + +static void cm_work_handler(struct work_struct *work); + +static inline void cm_deref_id(struct cm_id_private *cm_id_priv) +{ + if (atomic_dec_and_test(&cm_id_priv->refcount)) + complete(&cm_id_priv->comp); +} + +static int cm_alloc_msg(struct cm_id_private *cm_id_priv, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_agent *mad_agent; + struct ib_mad_send_buf *m; + struct ib_ah *ah; + struct cm_av *av; + unsigned long flags, flags2; + int ret = 0; + + /* don't let the port to be released till the agent is down */ + spin_lock_irqsave(&cm.state_lock, flags2); + spin_lock_irqsave(&cm.lock, flags); + if (!cm_id_priv->prim_send_port_not_ready) + av = &cm_id_priv->av; + else if (!cm_id_priv->altr_send_port_not_ready && + (cm_id_priv->alt_av.port)) + av = &cm_id_priv->alt_av; + else { + pr_info("%s: not valid CM id\n", __func__); + ret = -ENODEV; + spin_unlock_irqrestore(&cm.lock, flags); + goto out; + } + spin_unlock_irqrestore(&cm.lock, flags); + /* Make sure the port haven't released the mad yet */ + mad_agent = cm_id_priv->av.port->mad_agent; + if (!mad_agent) { + pr_info("%s: not a valid MAD agent\n", __func__); + ret = -ENODEV; + goto out; + } + ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto out; + } + + m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, + av->pkey_index, + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + ret = PTR_ERR(m); + goto out; + } + + /* Timeout set by caller if response is expected. */ + m->ah = ah; + m->retries = cm_id_priv->max_cm_retries; + + atomic_inc(&cm_id_priv->refcount); + m->context[0] = cm_id_priv; + *msg = m; + +out: + spin_unlock_irqrestore(&cm.state_lock, flags2); + return ret; +} + +static int cm_alloc_response_msg(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_send_buf *m; + struct ib_ah *ah; + + ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, + mad_recv_wc->recv_buf.grh, port->port_num); + if (IS_ERR(ah)) + return PTR_ERR(ah); + + m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); + if (IS_ERR(m)) { + ib_destroy_ah(ah); + return PTR_ERR(m); + } + m->ah = ah; + *msg = m; + return 0; +} + +static void cm_free_msg(struct ib_mad_send_buf *msg) +{ + ib_destroy_ah(msg->ah); + if (msg->context[0]) + cm_deref_id(msg->context[0]); + ib_free_send_mad(msg); +} + +static void * cm_copy_private_data(const void *private_data, + u8 private_data_len) +{ + void *data; + + if (!private_data || !private_data_len) + return NULL; + + data = kmemdup(private_data, private_data_len, GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + return data; +} + +static void cm_set_private_data(struct cm_id_private *cm_id_priv, + void *private_data, u8 private_data_len) +{ + if (cm_id_priv->private_data && cm_id_priv->private_data_len) + kfree(cm_id_priv->private_data); + + cm_id_priv->private_data = private_data; + cm_id_priv->private_data_len = private_data_len; +} + +static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, + struct ib_grh *grh, struct cm_av *av) +{ + av->port = port; + av->pkey_index = wc->pkey_index; + ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, + grh, &av->ah_attr); +} + +static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, + struct cm_id_private *cm_id_priv) +{ + struct cm_device *cm_dev; + struct cm_port *port = NULL; + unsigned long flags; + int ret; + u8 p; + struct net_device *ndev = ib_get_ndev_from_path(path); + + read_lock_irqsave(&cm.device_lock, flags); + list_for_each_entry(cm_dev, &cm.device_list, list) { + if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, + path->gid_type, ndev, &p, NULL)) { + port = cm_dev->port[p-1]; + break; + } + } + read_unlock_irqrestore(&cm.device_lock, flags); + + if (ndev) + dev_put(ndev); + + if (!port) + return -EINVAL; + + ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, + be16_to_cpu(path->pkey), &av->pkey_index); + if (ret) + return ret; + + av->port = port; + ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num, + path, &av->ah_attr); + if (ret) + return ret; + + av->timeout = path->packet_life_time + 1; + + spin_lock_irqsave(&cm.lock, flags); + if (&cm_id_priv->av == av) + list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); + else if (&cm_id_priv->alt_av == av) + list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); + else + ret = -EINVAL; + + spin_unlock_irqrestore(&cm.lock, flags); + + return ret; +} + +static int cm_alloc_id(struct cm_id_private *cm_id_priv) +{ + unsigned long flags; + int id; + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&cm.lock, flags); + + id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); + + spin_unlock_irqrestore(&cm.lock, flags); + idr_preload_end(); + + cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; + return id < 0 ? id : 0; +} + +static void cm_free_id(__be32 local_id) +{ + spin_lock_irq(&cm.lock); + idr_remove(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); + spin_unlock_irq(&cm.lock); +} + +static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = idr_find(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); + if (cm_id_priv) { + if (cm_id_priv->id.remote_id == remote_id) + atomic_inc(&cm_id_priv->refcount); + else + cm_id_priv = NULL; + } + + return cm_id_priv; +} + +static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) +{ + struct cm_id_private *cm_id_priv; + + spin_lock_irq(&cm.lock); + cm_id_priv = cm_get_id(local_id, remote_id); + spin_unlock_irq(&cm.lock); + + return cm_id_priv; +} + +/* + * Trivial helpers to strip endian annotation and compare; the + * endianness doesn't actually matter since we just need a stable + * order for the RB tree. + */ +static int be32_lt(__be32 a, __be32 b) +{ + return (__force u32) a < (__force u32) b; +} + +static int be32_gt(__be32 a, __be32 b) +{ + return (__force u32) a > (__force u32) b; +} + +static int be64_lt(__be64 a, __be64 b) +{ + return (__force u64) a < (__force u64) b; +} + +static int be64_gt(__be64 a, __be64 b) +{ + return (__force u64) a > (__force u64) b; +} + +static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) +{ + struct rb_node **link = &cm.listen_service_table.rb_node; + struct rb_node *parent = NULL; + struct cm_id_private *cur_cm_id_priv; + __be64 service_id = cm_id_priv->id.service_id; + __be64 service_mask = cm_id_priv->id.service_mask; + + while (*link) { + parent = *link; + cur_cm_id_priv = rb_entry(parent, struct cm_id_private, + service_node); + if ((cur_cm_id_priv->id.service_mask & service_id) == + (service_mask & cur_cm_id_priv->id.service_id) && + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + return cur_cm_id_priv; + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_left; + else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_right; + else + link = &(*link)->rb_right; + } + rb_link_node(&cm_id_priv->service_node, parent, link); + rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); + return NULL; +} + +static struct cm_id_private * cm_find_listen(struct ib_device *device, + __be64 service_id) +{ + struct rb_node *node = cm.listen_service_table.rb_node; + struct cm_id_private *cm_id_priv; + + while (node) { + cm_id_priv = rb_entry(node, struct cm_id_private, service_node); + if ((cm_id_priv->id.service_mask & service_id) == + cm_id_priv->id.service_id && + (cm_id_priv->id.device == device)) + return cm_id_priv; + + if (device < cm_id_priv->id.device) + node = node->rb_left; + else if (device > cm_id_priv->id.device) + node = node->rb_right; + else if (be64_lt(service_id, cm_id_priv->id.service_id)) + node = node->rb_left; + else if (be64_gt(service_id, cm_id_priv->id.service_id)) + node = node->rb_right; + else + node = node->rb_right; + } + return NULL; +} + +static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info + *timewait_info) +{ + struct rb_node **link = &cm.remote_id_table.rb_node; + struct rb_node *parent = NULL; + struct cm_timewait_info *cur_timewait_info; + __be64 remote_ca_guid = timewait_info->remote_ca_guid; + __be32 remote_id = timewait_info->work.remote_id; + + while (*link) { + parent = *link; + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_id_node); + if (be32_lt(remote_id, cur_timewait_info->work.remote_id)) + link = &(*link)->rb_left; + else if (be32_gt(remote_id, cur_timewait_info->work.remote_id)) + link = &(*link)->rb_right; + else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) + link = &(*link)->rb_left; + else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) + link = &(*link)->rb_right; + else + return cur_timewait_info; + } + timewait_info->inserted_remote_id = 1; + rb_link_node(&timewait_info->remote_id_node, parent, link); + rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); + return NULL; +} + +static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, + __be32 remote_id) +{ + struct rb_node *node = cm.remote_id_table.rb_node; + struct cm_timewait_info *timewait_info; + + while (node) { + timewait_info = rb_entry(node, struct cm_timewait_info, + remote_id_node); + if (be32_lt(remote_id, timewait_info->work.remote_id)) + node = node->rb_left; + else if (be32_gt(remote_id, timewait_info->work.remote_id)) + node = node->rb_right; + else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid)) + node = node->rb_left; + else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) + node = node->rb_right; + else + return timewait_info; + } + return NULL; +} + +static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info + *timewait_info) +{ + struct rb_node **link = &cm.remote_qp_table.rb_node; + struct rb_node *parent = NULL; + struct cm_timewait_info *cur_timewait_info; + __be64 remote_ca_guid = timewait_info->remote_ca_guid; + __be32 remote_qpn = timewait_info->remote_qpn; + + while (*link) { + parent = *link; + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_qp_node); + if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn)) + link = &(*link)->rb_left; + else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn)) + link = &(*link)->rb_right; + else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) + link = &(*link)->rb_left; + else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) + link = &(*link)->rb_right; + else + return cur_timewait_info; + } + timewait_info->inserted_remote_qp = 1; + rb_link_node(&timewait_info->remote_qp_node, parent, link); + rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); + return NULL; +} + +static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private + *cm_id_priv) +{ + struct rb_node **link = &cm.remote_sidr_table.rb_node; + struct rb_node *parent = NULL; + struct cm_id_private *cur_cm_id_priv; + union ib_gid *port_gid = &cm_id_priv->av.dgid; + __be32 remote_id = cm_id_priv->id.remote_id; + + while (*link) { + parent = *link; + cur_cm_id_priv = rb_entry(parent, struct cm_id_private, + sidr_id_node); + if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id)) + link = &(*link)->rb_left; + else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) + link = &(*link)->rb_right; + else { + int cmp; + cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, + sizeof *port_gid); + if (cmp < 0) + link = &(*link)->rb_left; + else if (cmp > 0) + link = &(*link)->rb_right; + else + return cur_cm_id_priv; + } + } + rb_link_node(&cm_id_priv->sidr_id_node, parent, link); + rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + return NULL; +} + +static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, + enum ib_cm_sidr_status status) +{ + struct ib_cm_sidr_rep_param param; + + memset(¶m, 0, sizeof param); + param.status = status; + ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); +} + +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, + void *context) +{ + struct cm_id_private *cm_id_priv; + int ret; + + cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.device = device; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + cm_id_priv->id.remote_cm_qpn = 1; + ret = cm_alloc_id(cm_id_priv); + if (ret) + goto error; + + spin_lock_init(&cm_id_priv->lock); + init_completion(&cm_id_priv->comp); + INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->prim_list); + INIT_LIST_HEAD(&cm_id_priv->altr_list); + atomic_set(&cm_id_priv->work_count, -1); + atomic_set(&cm_id_priv->refcount, 1); + return &cm_id_priv->id; + +error: + kfree(cm_id_priv); + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(ib_create_cm_id); + +static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) +{ + struct cm_work *work; + + if (list_empty(&cm_id_priv->work_list)) + return NULL; + + work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); + list_del(&work->list); + return work; +} + +static void cm_free_work(struct cm_work *work) +{ + if (work->mad_recv_wc) + ib_free_recv_mad(work->mad_recv_wc); + kfree(work); +} + +static inline int cm_convert_to_ms(int iba_time) +{ + /* approximate conversion to ms from 4.096us x 2^iba_time */ + return 1 << max(iba_time - 8, 0); +} + +/* + * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time + * Because of how ack_timeout is stored, adding one doubles the timeout. + * To avoid large timeouts, select the max(ack_delay, life_time + 1), and + * increment it (round up) only if the other is within 50%. + */ +static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) +{ + int ack_timeout = packet_life_time + 1; + + if (ack_timeout >= ca_ack_delay) + ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); + else + ack_timeout = ca_ack_delay + + (ack_timeout >= (ca_ack_delay - 1)); + + return min(31, ack_timeout); +} + +static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) +{ + if (timewait_info->inserted_remote_id) { + rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); + timewait_info->inserted_remote_id = 0; + } + + if (timewait_info->inserted_remote_qp) { + rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); + timewait_info->inserted_remote_qp = 0; + } +} + +static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) +{ + struct cm_timewait_info *timewait_info; + + timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); + if (!timewait_info) + return ERR_PTR(-ENOMEM); + + timewait_info->work.local_id = local_id; + INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler); + timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; + return timewait_info; +} + +static void cm_enter_timewait(struct cm_id_private *cm_id_priv) +{ + int wait_time; + unsigned long flags; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); + if (!cm_dev) + return; + + spin_lock_irqsave(&cm.lock, flags); + cm_cleanup_timewait(cm_id_priv->timewait_info); + list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); + spin_unlock_irqrestore(&cm.lock, flags); + + /* + * The cm_id could be destroyed by the user before we exit timewait. + * To protect against this, we search for the cm_id after exiting + * timewait before notifying the user that we've exited timewait. + */ + cm_id_priv->id.state = IB_CM_TIMEWAIT; + wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); + + /* Check if the device started its remove_one */ + spin_lock_irqsave(&cm.lock, flags); + if (!cm_dev->going_down) + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + spin_unlock_irqrestore(&cm.lock, flags); + + cm_id_priv->timewait_info = NULL; +} + +static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) +{ + unsigned long flags; + + cm_id_priv->id.state = IB_CM_IDLE; + if (cm_id_priv->timewait_info) { + spin_lock_irqsave(&cm.lock, flags); + cm_cleanup_timewait(cm_id_priv->timewait_info); + spin_unlock_irqrestore(&cm.lock, flags); + kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; + } +} + +static void cm_destroy_id(struct ib_cm_id *cm_id, int err) +{ + struct cm_id_private *cm_id_priv; + struct cm_work *work; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); +retest: + spin_lock_irq(&cm_id_priv->lock); + switch (cm_id->state) { + case IB_CM_LISTEN: + spin_unlock_irq(&cm_id_priv->lock); + + spin_lock_irq(&cm.lock); + if (--cm_id_priv->listen_sharecount > 0) { + /* The id is still shared. */ + cm_deref_id(cm_id_priv); + spin_unlock_irq(&cm.lock); + return; + } + rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); + spin_unlock_irq(&cm.lock); + break; + case IB_CM_SIDR_REQ_SENT: + cm_id->state = IB_CM_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + spin_unlock_irq(&cm_id_priv->lock); + break; + case IB_CM_SIDR_REQ_RCVD: + spin_unlock_irq(&cm_id_priv->lock); + cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); + spin_lock_irq(&cm.lock); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) + rb_erase(&cm_id_priv->sidr_id_node, + &cm.remote_sidr_table); + spin_unlock_irq(&cm.lock); + break; + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + spin_unlock_irq(&cm_id_priv->lock); + ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, + &cm_id_priv->id.device->node_guid, + sizeof cm_id_priv->id.device->node_guid, + NULL, 0); + break; + case IB_CM_REQ_RCVD: + if (err == -ENOMEM) { + /* Do not reject to allow future retries. */ + cm_reset_to_idle(cm_id_priv); + spin_unlock_irq(&cm_id_priv->lock); + } else { + spin_unlock_irq(&cm_id_priv->lock); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + } + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + /* Fall through */ + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + spin_unlock_irq(&cm_id_priv->lock); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + break; + case IB_CM_ESTABLISHED: + spin_unlock_irq(&cm_id_priv->lock); + if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) + break; + ib_send_cm_dreq(cm_id, NULL, 0); + goto retest; + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + cm_enter_timewait(cm_id_priv); + spin_unlock_irq(&cm_id_priv->lock); + break; + case IB_CM_DREQ_RCVD: + spin_unlock_irq(&cm_id_priv->lock); + ib_send_cm_drep(cm_id, NULL, 0); + break; + default: + spin_unlock_irq(&cm_id_priv->lock); + break; + } + + spin_lock_irq(&cm.lock); + if (!list_empty(&cm_id_priv->altr_list) && + (!cm_id_priv->altr_send_port_not_ready)) + list_del(&cm_id_priv->altr_list); + if (!list_empty(&cm_id_priv->prim_list) && + (!cm_id_priv->prim_send_port_not_ready)) + list_del(&cm_id_priv->prim_list); + spin_unlock_irq(&cm.lock); + + cm_free_id(cm_id->local_id); + cm_deref_id(cm_id_priv); + wait_for_completion(&cm_id_priv->comp); + while ((work = cm_dequeue_work(cm_id_priv)) != NULL) + cm_free_work(work); + kfree(cm_id_priv->private_data); + kfree(cm_id_priv); +} + +void ib_destroy_cm_id(struct ib_cm_id *cm_id) +{ + cm_destroy_id(cm_id, 0); +} +EXPORT_SYMBOL(ib_destroy_cm_id); + +/** + * __ib_cm_listen - Initiates listening on the specified service ID for + * connection and service ID resolution requests. + * @cm_id: Connection identifier associated with the listen request. + * @service_id: Service identifier matched against incoming connection + * and service ID resolution requests. The service ID should be specified + * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will + * assign a service ID to the caller. + * @service_mask: Mask applied to service ID used to listen across a + * range of service IDs. If set to 0, the service ID is matched + * exactly. This parameter is ignored if %service_id is set to + * IB_CM_ASSIGN_SERVICE_ID. + */ +static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, + __be64 service_mask) +{ + struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + int ret = 0; + + service_mask = service_mask ? service_mask : ~cpu_to_be64(0); + service_id &= service_mask; + if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && + (service_id != IB_CM_ASSIGN_SERVICE_ID)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + if (cm_id->state != IB_CM_IDLE) + return -EINVAL; + + cm_id->state = IB_CM_LISTEN; + ++cm_id_priv->listen_sharecount; + + if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + cm_id->service_id = cpu_to_be64(cm.listen_service_id++); + cm_id->service_mask = ~cpu_to_be64(0); + } else { + cm_id->service_id = service_id; + cm_id->service_mask = service_mask; + } + cur_cm_id_priv = cm_insert_listen(cm_id_priv); + + if (cur_cm_id_priv) { + cm_id->state = IB_CM_IDLE; + --cm_id_priv->listen_sharecount; + ret = -EBUSY; + } + return ret; +} + +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm.lock, flags); + ret = __ib_cm_listen(cm_id, service_id, service_mask); + spin_unlock_irqrestore(&cm.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_cm_listen); + +/** + * Create a new listening ib_cm_id and listen on the given service ID. + * + * If there's an existing ID listening on that same device and service ID, + * return it. + * + * @device: Device associated with the cm_id. All related communication will + * be associated with the specified device. + * @cm_handler: Callback invoked to notify the user of CM events. + * @service_id: Service identifier matched against incoming connection + * and service ID resolution requests. The service ID should be specified + * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will + * assign a service ID to the caller. + * + * Callers should call ib_destroy_cm_id when done with the listener ID. + */ +struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, + ib_cm_handler cm_handler, + __be64 service_id) +{ + struct cm_id_private *cm_id_priv; + struct ib_cm_id *cm_id; + unsigned long flags; + int err = 0; + + /* Create an ID in advance, since the creation may sleep */ + cm_id = ib_create_cm_id(device, cm_handler, NULL); + if (IS_ERR(cm_id)) + return cm_id; + + spin_lock_irqsave(&cm.lock, flags); + + if (service_id == IB_CM_ASSIGN_SERVICE_ID) + goto new_id; + + /* Find an existing ID */ + cm_id_priv = cm_find_listen(device, service_id); + if (cm_id_priv) { + if (cm_id->cm_handler != cm_handler || cm_id->context) { + /* Sharing an ib_cm_id with different handlers is not + * supported */ + spin_unlock_irqrestore(&cm.lock, flags); + return ERR_PTR(-EINVAL); + } + atomic_inc(&cm_id_priv->refcount); + ++cm_id_priv->listen_sharecount; + spin_unlock_irqrestore(&cm.lock, flags); + + ib_destroy_cm_id(cm_id); + cm_id = &cm_id_priv->id; + return cm_id; + } + +new_id: + /* Use newly created ID */ + err = __ib_cm_listen(cm_id, service_id, 0); + + spin_unlock_irqrestore(&cm.lock, flags); + + if (err) { + ib_destroy_cm_id(cm_id); + return ERR_PTR(err); + } + return cm_id; +} +EXPORT_SYMBOL(ib_cm_insert_listen); + +static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, + enum cm_msg_sequence msg_seq) +{ + u64 hi_tid, low_tid; + + hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; + low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | + (msg_seq << 30)); + return cpu_to_be64(hi_tid | low_tid); +} + +static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, + __be16 attr_id, __be64 tid) +{ + hdr->base_version = IB_MGMT_BASE_VERSION; + hdr->mgmt_class = IB_MGMT_CLASS_CM; + hdr->class_version = IB_CM_CLASS_VERSION; + hdr->method = IB_MGMT_METHOD_SEND; + hdr->attr_id = attr_id; + hdr->tid = tid; +} + +static void cm_format_req(struct cm_req_msg *req_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_req_param *param) +{ + struct ib_sa_path_rec *pri_path = param->primary_path; + struct ib_sa_path_rec *alt_path = param->alternate_path; + + cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); + + req_msg->local_comm_id = cm_id_priv->id.local_id; + req_msg->service_id = param->service_id; + req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); + cm_req_set_init_depth(req_msg, param->initiator_depth); + cm_req_set_remote_resp_timeout(req_msg, + param->remote_cm_response_timeout); + cm_req_set_qp_type(req_msg, param->qp_type); + cm_req_set_flow_ctrl(req_msg, param->flow_control); + cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); + cm_req_set_local_resp_timeout(req_msg, + param->local_cm_response_timeout); + req_msg->pkey = param->primary_path->pkey; + cm_req_set_path_mtu(req_msg, param->primary_path->mtu); + cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); + + if (param->qp_type != IB_QPT_XRC_INI) { + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_retry_count(req_msg, param->retry_count); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); + cm_req_set_srq(req_msg, param->srq); + } + + if (pri_path->hop_limit <= 1) { + req_msg->primary_local_lid = pri_path->slid; + req_msg->primary_remote_lid = pri_path->dlid; + } else { + /* Work-around until there's a way to obtain remote LID info */ + req_msg->primary_local_lid = IB_LID_PERMISSIVE; + req_msg->primary_remote_lid = IB_LID_PERMISSIVE; + } + req_msg->primary_local_gid = pri_path->sgid; + req_msg->primary_remote_gid = pri_path->dgid; + cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); + cm_req_set_primary_packet_rate(req_msg, pri_path->rate); + req_msg->primary_traffic_class = pri_path->traffic_class; + req_msg->primary_hop_limit = pri_path->hop_limit; + cm_req_set_primary_sl(req_msg, pri_path->sl); + cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); + cm_req_set_primary_local_ack_timeout(req_msg, + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + pri_path->packet_life_time)); + + if (alt_path) { + if (alt_path->hop_limit <= 1) { + req_msg->alt_local_lid = alt_path->slid; + req_msg->alt_remote_lid = alt_path->dlid; + } else { + req_msg->alt_local_lid = IB_LID_PERMISSIVE; + req_msg->alt_remote_lid = IB_LID_PERMISSIVE; + } + req_msg->alt_local_gid = alt_path->sgid; + req_msg->alt_remote_gid = alt_path->dgid; + cm_req_set_alt_flow_label(req_msg, + alt_path->flow_label); + cm_req_set_alt_packet_rate(req_msg, alt_path->rate); + req_msg->alt_traffic_class = alt_path->traffic_class; + req_msg->alt_hop_limit = alt_path->hop_limit; + cm_req_set_alt_sl(req_msg, alt_path->sl); + cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); + cm_req_set_alt_local_ack_timeout(req_msg, + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + alt_path->packet_life_time)); + } + + if (param->private_data && param->private_data_len) + memcpy(req_msg->private_data, param->private_data, + param->private_data_len); +} + +static int cm_validate_req_param(struct ib_cm_req_param *param) +{ + /* peer-to-peer not supported */ + if (param->peer_to_peer) + return -EINVAL; + + if (!param->primary_path) + return -EINVAL; + + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && + param->qp_type != IB_QPT_XRC_INI) + return -EINVAL; + + if (param->private_data && + param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) + return -EINVAL; + + if (param->alternate_path && + (param->alternate_path->pkey != param->primary_path->pkey || + param->alternate_path->mtu != param->primary_path->mtu)) + return -EINVAL; + + return 0; +} + +int ib_send_cm_req(struct ib_cm_id *cm_id, + struct ib_cm_req_param *param) +{ + struct cm_id_private *cm_id_priv; + struct cm_req_msg *req_msg; + unsigned long flags; + int ret; + + ret = cm_validate_req_param(param); + if (ret) + return ret; + + /* Verify that we're not in timewait. */ + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = -EINVAL; + goto out; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> + id.local_id); + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); + goto out; + } + + ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + cm_id_priv); + if (ret) + goto error1; + if (param->alternate_path) { + ret = cm_init_av_by_path(param->alternate_path, + &cm_id_priv->alt_av, cm_id_priv); + if (ret) + goto error1; + } + cm_id->service_id = param->service_id; + cm_id->service_mask = ~cpu_to_be64(0); + cm_id_priv->timeout_ms = cm_convert_to_ms( + param->primary_path->packet_life_time) * 2 + + cm_convert_to_ms( + param->remote_cm_response_timeout); + cm_id_priv->max_cm_retries = param->max_cm_retries; + cm_id_priv->initiator_depth = param->initiator_depth; + cm_id_priv->responder_resources = param->responder_resources; + cm_id_priv->retry_count = param->retry_count; + cm_id_priv->path_mtu = param->primary_path->mtu; + cm_id_priv->pkey = param->primary_path->pkey; + cm_id_priv->qp_type = param->qp_type; + + ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); + if (ret) + goto error1; + + req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; + cm_format_req(req_msg, cm_id_priv, param); + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; + + cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = ib_post_send_mad(cm_id_priv->msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto error2; + } + BUG_ON(cm_id->state != IB_CM_IDLE); + cm_id->state = IB_CM_REQ_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error2: cm_free_msg(cm_id_priv->msg); +error1: kfree(cm_id_priv->timewait_info); +out: return ret; +} +EXPORT_SYMBOL(ib_send_cm_req); + +static int cm_issue_rej(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + enum ib_cm_rej_reason reason, + enum cm_msg_response msg_rejected, + void *ari, u8 ari_length) +{ + struct ib_mad_send_buf *msg = NULL; + struct cm_rej_msg *rej_msg, *rcv_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + /* We just need common CM header information. Cast to any message. */ + rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; + rej_msg = (struct cm_rej_msg *) msg->mad; + + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); + rej_msg->remote_comm_id = rcv_msg->local_comm_id; + rej_msg->local_comm_id = rcv_msg->remote_comm_id; + cm_rej_set_msg_rejected(rej_msg, msg_rejected); + rej_msg->reason = cpu_to_be16(reason); + + if (ari && ari_length) { + cm_rej_set_reject_info_len(rej_msg, ari_length); + memcpy(rej_msg->ari, ari, ari_length); + } + + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); + + return ret; +} + +static void cm_format_paths_from_req(struct cm_req_msg *req_msg, + struct ib_sa_path_rec *primary_path, + struct ib_sa_path_rec *alt_path) +{ + memset(primary_path, 0, sizeof *primary_path); + primary_path->dgid = req_msg->primary_local_gid; + primary_path->sgid = req_msg->primary_remote_gid; + primary_path->dlid = req_msg->primary_local_lid; + primary_path->slid = req_msg->primary_remote_lid; + primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); + primary_path->hop_limit = req_msg->primary_hop_limit; + primary_path->traffic_class = req_msg->primary_traffic_class; + primary_path->reversible = 1; + primary_path->pkey = req_msg->pkey; + primary_path->sl = cm_req_get_primary_sl(req_msg); + primary_path->mtu_selector = IB_SA_EQ; + primary_path->mtu = cm_req_get_path_mtu(req_msg); + primary_path->rate_selector = IB_SA_EQ; + primary_path->rate = cm_req_get_primary_packet_rate(req_msg); + primary_path->packet_life_time_selector = IB_SA_EQ; + primary_path->packet_life_time = + cm_req_get_primary_local_ack_timeout(req_msg); + primary_path->packet_life_time -= (primary_path->packet_life_time > 0); + primary_path->service_id = req_msg->service_id; + + if (req_msg->alt_local_lid) { + memset(alt_path, 0, sizeof *alt_path); + alt_path->dgid = req_msg->alt_local_gid; + alt_path->sgid = req_msg->alt_remote_gid; + alt_path->dlid = req_msg->alt_local_lid; + alt_path->slid = req_msg->alt_remote_lid; + alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); + alt_path->hop_limit = req_msg->alt_hop_limit; + alt_path->traffic_class = req_msg->alt_traffic_class; + alt_path->reversible = 1; + alt_path->pkey = req_msg->pkey; + alt_path->sl = cm_req_get_alt_sl(req_msg); + alt_path->mtu_selector = IB_SA_EQ; + alt_path->mtu = cm_req_get_path_mtu(req_msg); + alt_path->rate_selector = IB_SA_EQ; + alt_path->rate = cm_req_get_alt_packet_rate(req_msg); + alt_path->packet_life_time_selector = IB_SA_EQ; + alt_path->packet_life_time = + cm_req_get_alt_local_ack_timeout(req_msg); + alt_path->packet_life_time -= (alt_path->packet_life_time > 0); + alt_path->service_id = req_msg->service_id; + } +} + +static u16 cm_get_bth_pkey(struct cm_work *work) +{ + struct ib_device *ib_dev = work->port->cm_dev->ib_device; + u8 port_num = work->port->port_num; + u16 pkey_index = work->mad_recv_wc->wc->pkey_index; + u16 pkey; + int ret; + + ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey); + if (ret) { + dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n", + port_num, pkey_index, ret); + return 0; + } + + return pkey; +} + +static void cm_format_req_event(struct cm_work *work, + struct cm_id_private *cm_id_priv, + struct ib_cm_id *listen_id) +{ + struct cm_req_msg *req_msg; + struct ib_cm_req_event_param *param; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.req_rcvd; + param->listen_id = listen_id; + param->bth_pkey = cm_get_bth_pkey(work); + param->port = cm_id_priv->av.port->port_num; + param->primary_path = &work->path[0]; + if (req_msg->alt_local_lid) + param->alternate_path = &work->path[1]; + else + param->alternate_path = NULL; + param->remote_ca_guid = req_msg->local_ca_guid; + param->remote_qkey = be32_to_cpu(req_msg->local_qkey); + param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); + param->qp_type = cm_req_get_qp_type(req_msg); + param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); + param->responder_resources = cm_req_get_init_depth(req_msg); + param->initiator_depth = cm_req_get_resp_res(req_msg); + param->local_cm_response_timeout = + cm_req_get_remote_resp_timeout(req_msg); + param->flow_control = cm_req_get_flow_ctrl(req_msg); + param->remote_cm_response_timeout = + cm_req_get_local_resp_timeout(req_msg); + param->retry_count = cm_req_get_retry_count(req_msg); + param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + param->srq = cm_req_get_srq(req_msg); + work->cm_event.private_data = &req_msg->private_data; +} + +static void cm_process_work(struct cm_id_private *cm_id_priv, + struct cm_work *work) +{ + int ret; + + /* We will typically only have the current event to report. */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); + cm_free_work(work); + + while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { + spin_lock_irq(&cm_id_priv->lock); + work = cm_dequeue_work(cm_id_priv); + spin_unlock_irq(&cm_id_priv->lock); + BUG_ON(!work); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, + &work->cm_event); + cm_free_work(work); + } + cm_deref_id(cm_id_priv); + if (ret) + cm_destroy_id(&cm_id_priv->id, ret); +} + +static void cm_format_mra(struct cm_mra_msg *mra_msg, + struct cm_id_private *cm_id_priv, + enum cm_msg_response msg_mraed, u8 service_timeout, + const void *private_data, u8 private_data_len) +{ + cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); + cm_mra_set_msg_mraed(mra_msg, msg_mraed); + mra_msg->local_comm_id = cm_id_priv->id.local_id; + mra_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_mra_set_service_timeout(mra_msg, service_timeout); + + if (private_data && private_data_len) + memcpy(mra_msg->private_data, private_data, private_data_len); +} + +static void cm_format_rej(struct cm_rej_msg *rej_msg, + struct cm_id_private *cm_id_priv, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); + rej_msg->remote_comm_id = cm_id_priv->id.remote_id; + + switch(cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + rej_msg->local_comm_id = 0; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + break; + case IB_CM_MRA_REQ_SENT: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); + break; + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); + break; + default: + rej_msg->local_comm_id = cm_id_priv->id.local_id; + cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); + break; + } + + rej_msg->reason = cpu_to_be16(reason); + if (ari && ari_length) { + cm_rej_set_reject_info_len(rej_msg, ari_length); + memcpy(rej_msg->ari, ari, ari_length); + } + + if (private_data && private_data_len) + memcpy(rej_msg->private_data, private_data, private_data_len); +} + +static void cm_dup_req_handler(struct cm_work *work, + struct cm_id_private *cm_id_priv) +{ + struct ib_mad_send_buf *msg = NULL; + int ret; + + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REQ_COUNTER]); + + /* Quick state check to discard duplicate REQs. */ + if (cm_id_priv->id.state == IB_CM_REQ_RCVD) + return; + + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + if (ret) + return; + + spin_lock_irq(&cm_id_priv->lock); + switch (cm_id_priv->id.state) { + case IB_CM_MRA_REQ_SENT: + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + break; + case IB_CM_TIMEWAIT: + cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, + IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); + break; + default: + goto unlock; + } + spin_unlock_irq(&cm_id_priv->lock); + + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto free; + return; + +unlock: spin_unlock_irq(&cm_id_priv->lock); +free: cm_free_msg(msg); +} + +static struct cm_id_private * cm_match_req(struct cm_work *work, + struct cm_id_private *cm_id_priv) +{ + struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; + struct cm_timewait_info *timewait_info; + struct cm_req_msg *req_msg; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + + /* Check for possible duplicate REQ. */ + spin_lock_irq(&cm.lock); + timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); + if (timewait_info) { + cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + spin_unlock_irq(&cm.lock); + if (cur_cm_id_priv) { + cm_dup_req_handler(work, cur_cm_id_priv); + cm_deref_id(cur_cm_id_priv); + } + return NULL; + } + + /* Check for stale connections. */ + timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); + if (timewait_info) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + spin_unlock_irq(&cm.lock); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, + NULL, 0); + return NULL; + } + + /* Find matching listen request. */ + listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, + req_msg->service_id); + if (!listen_cm_id_priv) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + spin_unlock_irq(&cm.lock); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, + NULL, 0); + goto out; + } + atomic_inc(&listen_cm_id_priv->refcount); + atomic_inc(&cm_id_priv->refcount); + cm_id_priv->id.state = IB_CM_REQ_RCVD; + atomic_inc(&cm_id_priv->work_count); + spin_unlock_irq(&cm.lock); +out: + return listen_cm_id_priv; +} + +/* + * Work-around for inter-subnet connections. If the LIDs are permissive, + * we need to override the LID/SL data in the REQ with the LID information + * in the work completion. + */ +static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) +{ + if (!cm_req_get_primary_subnet_local(req_msg)) { + if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { + req_msg->primary_local_lid = cpu_to_be16(wc->slid); + cm_req_set_primary_sl(req_msg, wc->sl); + } + + if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) + req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); + } + + if (!cm_req_get_alt_subnet_local(req_msg)) { + if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { + req_msg->alt_local_lid = cpu_to_be16(wc->slid); + cm_req_set_alt_sl(req_msg, wc->sl); + } + + if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) + req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); + } +} + +static int cm_req_handler(struct cm_work *work) +{ + struct ib_cm_id *cm_id; + struct cm_id_private *cm_id_priv, *listen_cm_id_priv; + struct cm_req_msg *req_msg; + union ib_gid gid; + struct ib_gid_attr gid_attr; + int ret; + + req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; + + cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + cm_id_priv->id.remote_id = req_msg->local_comm_id; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &cm_id_priv->av); + cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> + id.local_id); + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); + goto destroy; + } + cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; + cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; + cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); + + listen_cm_id_priv = cm_match_req(work, cm_id_priv); + if (!listen_cm_id_priv) { + ret = -EINVAL; + kfree(cm_id_priv->timewait_info); + goto destroy; + } + + cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = listen_cm_id_priv->id.context; + cm_id_priv->id.service_id = req_msg->service_id; + cm_id_priv->id.service_mask = ~cpu_to_be64(0); + + cm_process_routed_req(req_msg, work->mad_recv_wc->wc); + cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + + memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); + work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit; + ret = ib_get_cached_gid(work->port->cm_dev->ib_device, + work->port->port_num, + cm_id_priv->av.ah_attr.grh.sgid_index, + &gid, &gid_attr); + if (!ret) { + if (gid_attr.ndev) { + work->path[0].ifindex = gid_attr.ndev->if_index; + work->path[0].net = dev_net(gid_attr.ndev); + dev_put(gid_attr.ndev); + } + work->path[0].gid_type = gid_attr.gid_type; + ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + cm_id_priv); + } + if (ret) { + int err = ib_get_cached_gid(work->port->cm_dev->ib_device, + work->port->port_num, 0, + &work->path[0].sgid, + &gid_attr); + if (!err && gid_attr.ndev) { + work->path[0].ifindex = gid_attr.ndev->if_index; + work->path[0].net = dev_net(gid_attr.ndev); + dev_put(gid_attr.ndev); + } + work->path[0].gid_type = gid_attr.gid_type; + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + &work->path[0].sgid, sizeof work->path[0].sgid, + NULL, 0); + goto rejected; + } + if (req_msg->alt_local_lid) { + ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, + cm_id_priv); + if (ret) { + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, + &work->path[0].sgid, + sizeof work->path[0].sgid, NULL, 0); + goto rejected; + } + } + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->timeout_ms = cm_convert_to_ms( + cm_req_get_local_resp_timeout(req_msg)); + cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); + cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); + cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); + cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); + cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); + cm_id_priv->pkey = req_msg->pkey; + cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); + cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); + cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); + + cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); + cm_process_work(cm_id_priv, work); + cm_deref_id(listen_cm_id_priv); + return 0; + +rejected: + atomic_dec(&cm_id_priv->refcount); + cm_deref_id(listen_cm_id_priv); +destroy: + ib_destroy_cm_id(cm_id); + return ret; +} + +static void cm_format_rep(struct cm_rep_msg *rep_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_rep_param *param) +{ + cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); + rep_msg->local_comm_id = cm_id_priv->id.local_id; + rep_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); + rep_msg->resp_resources = param->responder_resources; + cm_rep_set_target_ack_delay(rep_msg, + cm_id_priv->av.port->cm_dev->ack_delay); + cm_rep_set_failover(rep_msg, param->failover_accepted); + cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); + rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + + if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_srq(rep_msg, param->srq); + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + } else { + cm_rep_set_srq(rep_msg, 1); + cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + } + + if (param->private_data && param->private_data_len) + memcpy(rep_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_rep(struct ib_cm_id *cm_id, + struct ib_cm_rep_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + struct cm_rep_msg *rep_msg; + unsigned long flags; + int ret; + + if (param->private_data && + param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_REQ_RCVD && + cm_id->state != IB_CM_MRA_REQ_SENT) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + rep_msg = (struct cm_rep_msg *) msg->mad; + cm_format_rep(rep_msg, cm_id_priv, param); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->state = IB_CM_REP_SENT; + cm_id_priv->msg = msg; + cm_id_priv->initiator_depth = param->initiator_depth; + cm_id_priv->responder_resources = param->responder_resources; + cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rep); + +static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); + rtu_msg->local_comm_id = cm_id_priv->id.local_id; + rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; + + if (private_data && private_data_len) + memcpy(rtu_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_rtu(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + void *data; + int ret; + + if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_REP_RCVD && + cm_id->state != IB_CM_MRA_REP_SENT) { + ret = -EINVAL; + goto error; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error; + + cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + kfree(data); + return ret; + } + + cm_id->state = IB_CM_ESTABLISHED; + cm_set_private_data(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rtu); + +static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) +{ + struct cm_rep_msg *rep_msg; + struct ib_cm_rep_event_param *param; + + rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.rep_rcvd; + param->remote_ca_guid = rep_msg->local_ca_guid; + param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); + param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); + param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); + param->responder_resources = rep_msg->initiator_depth; + param->initiator_depth = rep_msg->resp_resources; + param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + param->failover_accepted = cm_rep_get_failover(rep_msg); + param->flow_control = cm_rep_get_flow_ctrl(rep_msg); + param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + param->srq = cm_rep_get_srq(rep_msg); + work->cm_event.private_data = &rep_msg->private_data; +} + +static void cm_dup_rep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rep_msg *rep_msg; + struct ib_mad_send_buf *msg = NULL; + int ret; + + rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, + rep_msg->local_comm_id); + if (!cm_id_priv) + return; + + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REP_COUNTER]); + ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); + if (ret) + goto deref; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state == IB_CM_ESTABLISHED) + cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + else + goto unlock; + spin_unlock_irq(&cm_id_priv->lock); + + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto free; + goto deref; + +unlock: spin_unlock_irq(&cm_id_priv->lock); +free: cm_free_msg(msg); +deref: cm_deref_id(cm_id_priv); +} + +static int cm_rep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rep_msg *rep_msg; + int ret; + + rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); + if (!cm_id_priv) { + cm_dup_rep_handler(work); + return -EINVAL; + } + + cm_format_rep_event(work, cm_id_priv->qp_type); + + spin_lock_irq(&cm_id_priv->lock); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + break; + default: + spin_unlock_irq(&cm_id_priv->lock); + ret = -EINVAL; + goto error; + } + + cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; + cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); + + spin_lock(&cm.lock); + /* Check for duplicate REP. */ + if (cm_insert_remote_id(cm_id_priv->timewait_info)) { + spin_unlock(&cm.lock); + spin_unlock_irq(&cm_id_priv->lock); + ret = -EINVAL; + goto error; + } + /* Check for a stale connection. */ + if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { + rb_erase(&cm_id_priv->timewait_info->remote_id_node, + &cm.remote_id_table); + cm_id_priv->timewait_info->inserted_remote_id = 0; + spin_unlock(&cm.lock); + spin_unlock_irq(&cm_id_priv->lock); + cm_issue_rej(work->port, work->mad_recv_wc, + IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, + NULL, 0); + ret = -EINVAL; + goto error; + } + spin_unlock(&cm.lock); + + cm_id_priv->id.state = IB_CM_REP_RCVD; + cm_id_priv->id.remote_id = rep_msg->local_comm_id; + cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); + cm_id_priv->initiator_depth = rep_msg->resp_resources; + cm_id_priv->responder_resources = rep_msg->initiator_depth; + cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); + cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + cm_id_priv->av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->av.timeout - 1); + cm_id_priv->alt_av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->alt_av.timeout - 1); + + /* todo: handle peer_to_peer */ + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +error: + cm_deref_id(cm_id_priv); + return ret; +} + +static int cm_establish_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + int ret; + + /* See comment in cm_establish about lookup. */ + cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { + spin_unlock_irq(&cm_id_priv->lock); + goto out; + } + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_rtu_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rtu_msg *rtu_msg; + int ret; + + rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, + rtu_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &rtu_msg->private_data; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_REP_SENT && + cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { + spin_unlock_irq(&cm_id_priv->lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_RTU_COUNTER]); + goto out; + } + cm_id_priv->id.state = IB_CM_ESTABLISHED; + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); + dreq_msg->local_comm_id = cm_id_priv->id.local_id; + dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); + + if (private_data && private_data_len) + memcpy(dreq_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_dreq(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED) { + ret = -EINVAL; + goto out; + } + + if (cm_id->lap_state == IB_CM_LAP_SENT || + cm_id->lap_state == IB_CM_MRA_LAP_RCVD) + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) { + cm_enter_timewait(cm_id_priv); + goto out; + } + + cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + cm_enter_timewait(cm_id_priv); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->state = IB_CM_DREQ_SENT; + cm_id_priv->msg = msg; +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_dreq); + +static void cm_format_drep(struct cm_drep_msg *drep_msg, + struct cm_id_private *cm_id_priv, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); + drep_msg->local_comm_id = cm_id_priv->id.local_id; + drep_msg->remote_comm_id = cm_id_priv->id.remote_id; + + if (private_data && private_data_len) + memcpy(drep_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_drep(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + void *data; + int ret; + + if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_DREQ_RCVD) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return -EINVAL; + } + + cm_set_private_data(cm_id_priv, data, private_data_len); + cm_enter_timewait(cm_id_priv); + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, + private_data, private_data_len); + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_drep); + +static int cm_issue_drep(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_buf *msg = NULL; + struct cm_dreq_msg *dreq_msg; + struct cm_drep_msg *drep_msg; + int ret; + + ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); + if (ret) + return ret; + + dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; + drep_msg = (struct cm_drep_msg *) msg->mad; + + cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); + drep_msg->remote_comm_id = dreq_msg->local_comm_id; + drep_msg->local_comm_id = dreq_msg->remote_comm_id; + + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); + + return ret; +} + +static int cm_dreq_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_dreq_msg *dreq_msg; + struct ib_mad_send_buf *msg = NULL; + int ret; + + dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, + dreq_msg->local_comm_id); + if (!cm_id_priv) { + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); + cm_issue_drep(work->port, work->mad_recv_wc); + return -EINVAL; + } + + work->cm_event.private_data = &dreq_msg->private_data; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) + goto unlock; + + switch (cm_id_priv->id.state) { + case IB_CM_REP_SENT: + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + break; + case IB_CM_ESTABLISHED: + if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || + cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + break; + case IB_CM_MRA_REP_RCVD: + break; + case IB_CM_TIMEWAIT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); + if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + goto unlock; + + cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + spin_unlock_irq(&cm_id_priv->lock); + + if (ib_post_send_mad(msg, NULL)) + cm_free_msg(msg); + goto deref; + case IB_CM_DREQ_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); + goto unlock; + default: + goto unlock; + } + cm_id_priv->id.state = IB_CM_DREQ_RCVD; + cm_id_priv->tid = dreq_msg->hdr.tid; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +unlock: spin_unlock_irq(&cm_id_priv->lock); +deref: cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_drep_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_drep_msg *drep_msg; + int ret; + + drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, + drep_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &drep_msg->private_data; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_DREQ_SENT && + cm_id_priv->id.state != IB_CM_DREQ_RCVD) { + spin_unlock_irq(&cm_id_priv->lock); + goto out; + } + cm_enter_timewait(cm_id_priv); + + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +int ib_send_cm_rej(struct ib_cm_id *cm_id, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + + if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || + (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (!ret) + cm_format_rej((struct cm_rej_msg *) msg->mad, + cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + + cm_reset_to_idle(cm_id_priv); + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ret = cm_alloc_msg(cm_id_priv, &msg); + if (!ret) + cm_format_rej((struct cm_rej_msg *) msg->mad, + cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + + cm_enter_timewait(cm_id_priv); + break; + default: + ret = -EINVAL; + goto out; + } + + if (ret) + goto out; + + ret = ib_post_send_mad(msg, NULL); + if (ret) + cm_free_msg(msg); + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_rej); + +static void cm_format_rej_event(struct cm_work *work) +{ + struct cm_rej_msg *rej_msg; + struct ib_cm_rej_event_param *param; + + rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.rej_rcvd; + param->ari = rej_msg->ari; + param->ari_length = cm_rej_get_reject_info_len(rej_msg); + param->reason = __be16_to_cpu(rej_msg->reason); + work->cm_event.private_data = &rej_msg->private_data; +} + +static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + __be32 remote_id; + + remote_id = rej_msg->local_comm_id; + + if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { + spin_lock_irq(&cm.lock); + timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), + remote_id); + if (!timewait_info) { + spin_unlock_irq(&cm.lock); + return NULL; + } + cm_id_priv = idr_find(&cm.local_id_table, (__force int) + (timewait_info->work.local_id ^ + cm.random_id_operand)); + if (cm_id_priv) { + if (cm_id_priv->id.remote_id == remote_id) + atomic_inc(&cm_id_priv->refcount); + else + cm_id_priv = NULL; + } + spin_unlock_irq(&cm.lock); + } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) + cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); + else + cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); + + return cm_id_priv; +} + +static int cm_rej_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_rej_msg *rej_msg; + int ret; + + rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_rejected_id(rej_msg); + if (!cm_id_priv) + return -EINVAL; + + cm_format_rej_event(work); + + spin_lock_irq(&cm_id_priv->lock); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + /* fall through */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) + cm_enter_timewait(cm_id_priv); + else + cm_reset_to_idle(cm_id_priv); + break; + case IB_CM_DREQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + /* fall through */ + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + cm_enter_timewait(cm_id_priv); + break; + case IB_CM_ESTABLISHED: + if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || + cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { + if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) + ib_cancel_mad(cm_id_priv->av.port->mad_agent, + cm_id_priv->msg); + cm_enter_timewait(cm_id_priv); + break; + } + /* fall through */ + default: + spin_unlock_irq(&cm_id_priv->lock); + ret = -EINVAL; + goto out; + } + + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +int ib_send_cm_mra(struct ib_cm_id *cm_id, + u8 service_timeout, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + enum ib_cm_state cm_state; + enum ib_cm_lap_state lap_state; + enum cm_msg_response msg_response; + void *data; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) + return -EINVAL; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch(cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + cm_state = IB_CM_MRA_REQ_SENT; + lap_state = cm_id->lap_state; + msg_response = CM_MSG_RESPONSE_REQ; + break; + case IB_CM_REP_RCVD: + cm_state = IB_CM_MRA_REP_SENT; + lap_state = cm_id->lap_state; + msg_response = CM_MSG_RESPONSE_REP; + break; + case IB_CM_ESTABLISHED: + if (cm_id->lap_state == IB_CM_LAP_RCVD) { + cm_state = cm_id->state; + lap_state = IB_CM_MRA_LAP_SENT; + msg_response = CM_MSG_RESPONSE_OTHER; + break; + } + default: + ret = -EINVAL; + goto error1; + } + + if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error1; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + msg_response, service_timeout, + private_data, private_data_len); + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto error2; + } + + cm_id->state = cm_state; + cm_id->lap_state = lap_state; + cm_id_priv->service_timeout = service_timeout; + cm_set_private_data(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; + +error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + return ret; + +error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); + cm_free_msg(msg); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_mra); + +static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) +{ + switch (cm_mra_get_msg_mraed(mra_msg)) { + case CM_MSG_RESPONSE_REQ: + return cm_acquire_id(mra_msg->remote_comm_id, 0); + case CM_MSG_RESPONSE_REP: + case CM_MSG_RESPONSE_OTHER: + return cm_acquire_id(mra_msg->remote_comm_id, + mra_msg->local_comm_id); + default: + return NULL; + } +} + +static int cm_mra_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_mra_msg *mra_msg; + int timeout, ret; + + mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_mraed_id(mra_msg); + if (!cm_id_priv) + return -EINVAL; + + work->cm_event.private_data = &mra_msg->private_data; + work->cm_event.param.mra_rcvd.service_timeout = + cm_mra_get_service_timeout(mra_msg); + timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + + cm_convert_to_ms(cm_id_priv->av.timeout); + + spin_lock_irq(&cm_id_priv->lock); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; + break; + case IB_CM_REP_SENT: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + cm_id_priv->msg, timeout)) + goto out; + cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; + break; + case IB_CM_ESTABLISHED: + if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || + cm_id_priv->id.lap_state != IB_CM_LAP_SENT || + ib_modify_mad(cm_id_priv->av.port->mad_agent, + cm_id_priv->msg, timeout)) { + if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) + atomic_long_inc(&work->port-> + counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); + goto out; + } + cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; + break; + case IB_CM_MRA_REQ_RCVD: + case IB_CM_MRA_REP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); + /* fall through */ + default: + goto out; + } + + cm_id_priv->msg->context[1] = (void *) (unsigned long) + cm_id_priv->id.state; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + spin_unlock_irq(&cm_id_priv->lock); + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_lap(struct cm_lap_msg *lap_msg, + struct cm_id_private *cm_id_priv, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); + lap_msg->local_comm_id = cm_id_priv->id.local_id; + lap_msg->remote_comm_id = cm_id_priv->id.remote_id; + cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); + /* todo: need remote CM response timeout */ + cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); + lap_msg->alt_local_lid = alternate_path->slid; + lap_msg->alt_remote_lid = alternate_path->dlid; + lap_msg->alt_local_gid = alternate_path->sgid; + lap_msg->alt_remote_gid = alternate_path->dgid; + cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); + cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); + lap_msg->alt_hop_limit = alternate_path->hop_limit; + cm_lap_set_packet_rate(lap_msg, alternate_path->rate); + cm_lap_set_sl(lap_msg, alternate_path->sl); + cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ + cm_lap_set_local_ack_timeout(lap_msg, + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + alternate_path->packet_life_time)); + + if (private_data && private_data_len) + memcpy(lap_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_lap(struct ib_cm_id *cm_id, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + + if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED || + (cm_id->lap_state != IB_CM_LAP_UNINIT && + cm_id->lap_state != IB_CM_LAP_IDLE)) { + ret = -EINVAL; + goto out; + } + + ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + cm_id_priv); + if (ret) + goto out; + cm_id_priv->alt_av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->alt_av.timeout - 1); + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, + alternate_path, private_data, private_data_len); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->lap_state = IB_CM_LAP_SENT; + cm_id_priv->msg = msg; + +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_lap); + +static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, + struct ib_sa_path_rec *path, + struct cm_lap_msg *lap_msg) +{ + memset(path, 0, sizeof *path); + path->dgid = lap_msg->alt_local_gid; + path->sgid = lap_msg->alt_remote_gid; + path->dlid = lap_msg->alt_local_lid; + path->slid = lap_msg->alt_remote_lid; + path->flow_label = cm_lap_get_flow_label(lap_msg); + path->hop_limit = lap_msg->alt_hop_limit; + path->traffic_class = cm_lap_get_traffic_class(lap_msg); + path->reversible = 1; + path->pkey = cm_id_priv->pkey; + path->sl = cm_lap_get_sl(lap_msg); + path->mtu_selector = IB_SA_EQ; + path->mtu = cm_id_priv->path_mtu; + path->rate_selector = IB_SA_EQ; + path->rate = cm_lap_get_packet_rate(lap_msg); + path->packet_life_time_selector = IB_SA_EQ; + path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); + path->packet_life_time -= (path->packet_life_time > 0); +} + +static int cm_lap_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_lap_msg *lap_msg; + struct ib_cm_lap_event_param *param; + struct ib_mad_send_buf *msg = NULL; + int ret; + + /* todo: verify LAP request and send reject APR if invalid. */ + lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, + lap_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; + + param = &work->cm_event.param.lap_rcvd; + param->alternate_path = &work->path[0]; + cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); + work->cm_event.private_data = &lap_msg->private_data; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) + goto unlock; + + switch (cm_id_priv->id.lap_state) { + case IB_CM_LAP_UNINIT: + case IB_CM_LAP_IDLE: + break; + case IB_CM_MRA_LAP_SENT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); + if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + goto unlock; + + cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, + CM_MSG_RESPONSE_OTHER, + cm_id_priv->service_timeout, + cm_id_priv->private_data, + cm_id_priv->private_data_len); + spin_unlock_irq(&cm_id_priv->lock); + + if (ib_post_send_mad(msg, NULL)) + cm_free_msg(msg); + goto deref; + case IB_CM_LAP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); + goto unlock; + default: + goto unlock; + } + + cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; + cm_id_priv->tid = lap_msg->hdr.tid; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &cm_id_priv->av); + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_id_priv); + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; + +unlock: spin_unlock_irq(&cm_id_priv->lock); +deref: cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_apr(struct cm_apr_msg *apr_msg, + struct cm_id_private *cm_id_priv, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len) +{ + cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); + apr_msg->local_comm_id = cm_id_priv->id.local_id; + apr_msg->remote_comm_id = cm_id_priv->id.remote_id; + apr_msg->ap_status = (u8) status; + + if (info && info_length) { + apr_msg->info_length = info_length; + memcpy(apr_msg->info, info, info_length); + } + + if (private_data && private_data_len) + memcpy(apr_msg->private_data, private_data, private_data_len); +} + +int ib_send_cm_apr(struct ib_cm_id *cm_id, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + + if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || + (info && info_length > IB_CM_APR_INFO_LENGTH)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_ESTABLISHED || + (cm_id->lap_state != IB_CM_LAP_RCVD && + cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { + ret = -EINVAL; + goto out; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, + info, info_length, private_data, private_data_len); + ret = ib_post_send_mad(msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + + cm_id->lap_state = IB_CM_LAP_IDLE; +out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_apr); + +static int cm_apr_handler(struct cm_work *work) +{ + struct cm_id_private *cm_id_priv; + struct cm_apr_msg *apr_msg; + int ret; + + apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, + apr_msg->local_comm_id); + if (!cm_id_priv) + return -EINVAL; /* Unmatched reply. */ + + work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; + work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; + work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; + work->cm_event.private_data = &apr_msg->private_data; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED || + (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && + cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { + spin_unlock_irq(&cm_id_priv->lock); + goto out; + } + cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + cm_id_priv->msg = NULL; + + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static int cm_timewait_handler(struct cm_work *work) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + int ret; + + timewait_info = (struct cm_timewait_info *)work; + spin_lock_irq(&cm.lock); + list_del(&timewait_info->list); + spin_unlock_irq(&cm.lock); + + cm_id_priv = cm_acquire_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_TIMEWAIT || + cm_id_priv->remote_qpn != timewait_info->remote_qpn) { + spin_unlock_irq(&cm_id_priv->lock); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irq(&cm_id_priv->lock); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_req_param *param) +{ + cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, + cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); + sidr_req_msg->request_id = cm_id_priv->id.local_id; + sidr_req_msg->pkey = param->path->pkey; + sidr_req_msg->service_id = param->service_id; + + if (param->private_data && param->private_data_len) + memcpy(sidr_req_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, + struct ib_cm_sidr_req_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + + if (!param->path || (param->private_data && + param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); + if (ret) + goto out; + + cm_id->service_id = param->service_id; + cm_id->service_mask = ~cpu_to_be64(0); + cm_id_priv->timeout_ms = param->timeout_ms; + cm_id_priv->max_cm_retries = param->max_cm_retries; + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto out; + + cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, + param); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state == IB_CM_IDLE) + ret = ib_post_send_mad(msg, NULL); + else + ret = -EINVAL; + + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + goto out; + } + cm_id->state = IB_CM_SIDR_REQ_SENT; + cm_id_priv->msg = msg; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +out: + return ret; +} +EXPORT_SYMBOL(ib_send_cm_sidr_req); + +static void cm_format_sidr_req_event(struct cm_work *work, + struct ib_cm_id *listen_id) +{ + struct cm_sidr_req_msg *sidr_req_msg; + struct ib_cm_sidr_req_event_param *param; + + sidr_req_msg = (struct cm_sidr_req_msg *) + work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.sidr_req_rcvd; + param->pkey = __be16_to_cpu(sidr_req_msg->pkey); + param->listen_id = listen_id; + param->service_id = sidr_req_msg->service_id; + param->bth_pkey = cm_get_bth_pkey(work); + param->port = work->port->port_num; + work->cm_event.private_data = &sidr_req_msg->private_data; +} + +static int cm_sidr_req_handler(struct cm_work *work) +{ + struct ib_cm_id *cm_id; + struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + struct cm_sidr_req_msg *sidr_req_msg; + struct ib_wc *wc; + + cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + + /* Record SGID/SLID and request ID for lookup. */ + sidr_req_msg = (struct cm_sidr_req_msg *) + work->mad_recv_wc->recv_buf.mad; + wc = work->mad_recv_wc->wc; + cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); + cm_id_priv->av.dgid.global.interface_id = 0; + cm_init_av_for_response(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &cm_id_priv->av); + cm_id_priv->id.remote_id = sidr_req_msg->request_id; + cm_id_priv->tid = sidr_req_msg->hdr.tid; + atomic_inc(&cm_id_priv->work_count); + + spin_lock_irq(&cm.lock); + cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); + if (cur_cm_id_priv) { + spin_unlock_irq(&cm.lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_SIDR_REQ_COUNTER]); + goto out; /* Duplicate message. */ + } + cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; + cur_cm_id_priv = cm_find_listen(cm_id->device, + sidr_req_msg->service_id); + if (!cur_cm_id_priv) { + spin_unlock_irq(&cm.lock); + cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); + goto out; /* No match. */ + } + atomic_inc(&cur_cm_id_priv->refcount); + atomic_inc(&cm_id_priv->refcount); + spin_unlock_irq(&cm.lock); + + cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = cur_cm_id_priv->id.context; + cm_id_priv->id.service_id = sidr_req_msg->service_id; + cm_id_priv->id.service_mask = ~cpu_to_be64(0); + + cm_format_sidr_req_event(work, &cur_cm_id_priv->id); + cm_process_work(cm_id_priv, work); + cm_deref_id(cur_cm_id_priv); + return 0; +out: + ib_destroy_cm_id(&cm_id_priv->id); + return -EINVAL; +} + +static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, + struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_rep_param *param) +{ + cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, + cm_id_priv->tid); + sidr_rep_msg->request_id = cm_id_priv->id.remote_id; + sidr_rep_msg->status = param->status; + cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); + sidr_rep_msg->service_id = cm_id_priv->id.service_id; + sidr_rep_msg->qkey = cpu_to_be32(param->qkey); + + if (param->info && param->info_length) + memcpy(sidr_rep_msg->info, param->info, param->info_length); + + if (param->private_data && param->private_data_len) + memcpy(sidr_rep_msg->private_data, param->private_data, + param->private_data_len); +} + +int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, + struct ib_cm_sidr_rep_param *param) +{ + struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; + unsigned long flags; + int ret; + + if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || + (param->private_data && + param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) + return -EINVAL; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { + ret = -EINVAL; + goto error; + } + + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + goto error; + + cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, + param); + ret = ib_post_send_mad(msg, NULL); + if (ret) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_free_msg(msg); + return ret; + } + cm_id->state = IB_CM_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + spin_lock_irqsave(&cm.lock, flags); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); + } + spin_unlock_irqrestore(&cm.lock, flags); + return 0; + +error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(ib_send_cm_sidr_rep); + +static void cm_format_sidr_rep_event(struct cm_work *work) +{ + struct cm_sidr_rep_msg *sidr_rep_msg; + struct ib_cm_sidr_rep_event_param *param; + + sidr_rep_msg = (struct cm_sidr_rep_msg *) + work->mad_recv_wc->recv_buf.mad; + param = &work->cm_event.param.sidr_rep_rcvd; + param->status = sidr_rep_msg->status; + param->qkey = be32_to_cpu(sidr_rep_msg->qkey); + param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); + param->info = &sidr_rep_msg->info; + param->info_len = sidr_rep_msg->info_length; + work->cm_event.private_data = &sidr_rep_msg->private_data; +} + +static int cm_sidr_rep_handler(struct cm_work *work) +{ + struct cm_sidr_rep_msg *sidr_rep_msg; + struct cm_id_private *cm_id_priv; + + sidr_rep_msg = (struct cm_sidr_rep_msg *) + work->mad_recv_wc->recv_buf.mad; + cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); + if (!cm_id_priv) + return -EINVAL; /* Unmatched reply. */ + + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { + spin_unlock_irq(&cm_id_priv->lock); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + spin_unlock_irq(&cm_id_priv->lock); + + cm_format_sidr_rep_event(work); + cm_process_work(cm_id_priv, work); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + +static void cm_process_send_error(struct ib_mad_send_buf *msg, + enum ib_wc_status wc_status) +{ + struct cm_id_private *cm_id_priv; + struct ib_cm_event cm_event; + enum ib_cm_state state; + int ret; + + memset(&cm_event, 0, sizeof cm_event); + cm_id_priv = msg->context[0]; + + /* Discard old sends or ones without a response. */ + spin_lock_irq(&cm_id_priv->lock); + state = (enum ib_cm_state) (unsigned long) msg->context[1]; + if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) + goto discard; + + switch (state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + cm_reset_to_idle(cm_id_priv); + cm_event.event = IB_CM_REQ_ERROR; + break; + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + cm_reset_to_idle(cm_id_priv); + cm_event.event = IB_CM_REP_ERROR; + break; + case IB_CM_DREQ_SENT: + cm_enter_timewait(cm_id_priv); + cm_event.event = IB_CM_DREQ_ERROR; + break; + case IB_CM_SIDR_REQ_SENT: + cm_id_priv->id.state = IB_CM_IDLE; + cm_event.event = IB_CM_SIDR_REQ_ERROR; + break; + default: + goto discard; + } + spin_unlock_irq(&cm_id_priv->lock); + cm_event.param.send_status = wc_status; + + /* No other events can occur on the cm_id at this point. */ + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); + cm_free_msg(msg); + if (ret) + ib_destroy_cm_id(&cm_id_priv->id); + return; +discard: + spin_unlock_irq(&cm_id_priv->lock); + cm_free_msg(msg); +} + +static void cm_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_port *port; + u16 attr_index; + + port = mad_agent->context; + attr_index = be16_to_cpu(((struct ib_mad_hdr *) + msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; + + /* + * If the send was in response to a received message (context[0] is not + * set to a cm_id), and is not a REJ, then it is a send that was + * manually retried. + */ + if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + msg->retries = 1; + + atomic_long_add(1 + msg->retries, + &port->counter_group[CM_XMIT].counter[attr_index]); + if (msg->retries) + atomic_long_add(msg->retries, + &port->counter_group[CM_XMIT_RETRIES]. + counter[attr_index]); + + switch (mad_send_wc->status) { + case IB_WC_SUCCESS: + case IB_WC_WR_FLUSH_ERR: + cm_free_msg(msg); + break; + default: + if (msg->context[0] && msg->context[1]) + cm_process_send_error(msg, mad_send_wc->status); + else + cm_free_msg(msg); + break; + } +} + +static void cm_work_handler(struct work_struct *_work) +{ + struct cm_work *work = container_of(_work, struct cm_work, work.work); + int ret; + + switch (work->cm_event.event) { + case IB_CM_REQ_RECEIVED: + ret = cm_req_handler(work); + break; + case IB_CM_MRA_RECEIVED: + ret = cm_mra_handler(work); + break; + case IB_CM_REJ_RECEIVED: + ret = cm_rej_handler(work); + break; + case IB_CM_REP_RECEIVED: + ret = cm_rep_handler(work); + break; + case IB_CM_RTU_RECEIVED: + ret = cm_rtu_handler(work); + break; + case IB_CM_USER_ESTABLISHED: + ret = cm_establish_handler(work); + break; + case IB_CM_DREQ_RECEIVED: + ret = cm_dreq_handler(work); + break; + case IB_CM_DREP_RECEIVED: + ret = cm_drep_handler(work); + break; + case IB_CM_SIDR_REQ_RECEIVED: + ret = cm_sidr_req_handler(work); + break; + case IB_CM_SIDR_REP_RECEIVED: + ret = cm_sidr_rep_handler(work); + break; + case IB_CM_LAP_RECEIVED: + ret = cm_lap_handler(work); + break; + case IB_CM_APR_RECEIVED: + ret = cm_apr_handler(work); + break; + case IB_CM_TIMEWAIT_EXIT: + ret = cm_timewait_handler(work); + break; + default: + ret = -EINVAL; + break; + } + if (ret) + cm_free_work(work); +} + +static int cm_establish(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + struct cm_work *work; + unsigned long flags; + int ret = 0; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id->device, &cm_client); + if (!cm_dev) + return -ENODEV; + + work = kmalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id->state) + { + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + cm_id->state = IB_CM_ESTABLISHED; + break; + case IB_CM_ESTABLISHED: + ret = -EISCONN; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) { + kfree(work); + goto out; + } + + /* + * The CM worker thread may try to destroy the cm_id before it + * can execute this work item. To prevent potential deadlock, + * we need to find the cm_id once we're in the context of the + * worker thread, rather than holding a reference on it. + */ + INIT_DELAYED_WORK(&work->work, cm_work_handler); + work->local_id = cm_id->local_id; + work->remote_id = cm_id->remote_id; + work->mad_recv_wc = NULL; + work->cm_event.event = IB_CM_USER_ESTABLISHED; + + /* Check if the device started its remove_one */ + spin_lock_irqsave(&cm.lock, flags); + if (!cm_dev->going_down) { + queue_delayed_work(cm.wq, &work->work, 0); + } else { + kfree(work); + ret = -ENODEV; + } + spin_unlock_irqrestore(&cm.lock, flags); + +out: + return ret; +} + +static int cm_migrate(struct ib_cm_id *cm_id) +{ + struct cm_id_private *cm_id_priv; + struct cm_av tmp_av; + unsigned long flags; + int tmp_send_port_not_ready; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id->state == IB_CM_ESTABLISHED && + (cm_id->lap_state == IB_CM_LAP_UNINIT || + cm_id->lap_state == IB_CM_LAP_IDLE)) { + cm_id->lap_state = IB_CM_LAP_IDLE; + /* Swap address vector */ + tmp_av = cm_id_priv->av; + cm_id_priv->av = cm_id_priv->alt_av; + cm_id_priv->alt_av = tmp_av; + /* Swap port send ready state */ + tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; + cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; + cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; + } else + ret = -EINVAL; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) +{ + int ret; + + switch (event) { + case IB_EVENT_COMM_EST: + ret = cm_establish(cm_id); + break; + case IB_EVENT_PATH_MIG: + ret = cm_migrate(cm_id); + break; + default: + ret = -EINVAL; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_notify); + +static void cm_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct cm_port *port = mad_agent->context; + struct cm_work *work; + enum ib_cm_event_type event; + u16 attr_id; + int paths = 0; + int going_down = 0; + + switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { + case CM_REQ_ATTR_ID: + paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> + alt_local_lid != 0); + event = IB_CM_REQ_RECEIVED; + break; + case CM_MRA_ATTR_ID: + event = IB_CM_MRA_RECEIVED; + break; + case CM_REJ_ATTR_ID: + event = IB_CM_REJ_RECEIVED; + break; + case CM_REP_ATTR_ID: + event = IB_CM_REP_RECEIVED; + break; + case CM_RTU_ATTR_ID: + event = IB_CM_RTU_RECEIVED; + break; + case CM_DREQ_ATTR_ID: + event = IB_CM_DREQ_RECEIVED; + break; + case CM_DREP_ATTR_ID: + event = IB_CM_DREP_RECEIVED; + break; + case CM_SIDR_REQ_ATTR_ID: + event = IB_CM_SIDR_REQ_RECEIVED; + break; + case CM_SIDR_REP_ATTR_ID: + event = IB_CM_SIDR_REP_RECEIVED; + break; + case CM_LAP_ATTR_ID: + paths = 1; + event = IB_CM_LAP_RECEIVED; + break; + case CM_APR_ATTR_ID: + event = IB_CM_APR_RECEIVED; + break; + default: + ib_free_recv_mad(mad_recv_wc); + return; + } + + attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); + atomic_long_inc(&port->counter_group[CM_RECV]. + counter[attr_id - CM_ATTR_ID_OFFSET]); + + work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, + GFP_KERNEL); + if (!work) { + ib_free_recv_mad(mad_recv_wc); + return; + } + + INIT_DELAYED_WORK(&work->work, cm_work_handler); + work->cm_event.event = event; + work->mad_recv_wc = mad_recv_wc; + work->port = port; + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!port->cm_dev->going_down) + queue_delayed_work(cm.wq, &work->work, 0); + else + going_down = 1; + spin_unlock_irq(&cm.lock); + + if (going_down) { + kfree(work); + ib_free_recv_mad(mad_recv_wc); + } +} + +static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | IB_QP_PORT; + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; + if (cm_id_priv->responder_resources) + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_ATOMIC; + qp_attr->pkey_index = cm_id_priv->av.pkey_index; + qp_attr->port_num = cm_id_priv->av.port->port_num; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | + IB_QP_DEST_QPN | IB_QP_RQ_PSN; + qp_attr->ah_attr = cm_id_priv->av.ah_attr; + qp_attr->path_mtu = cm_id_priv->path_mtu; + qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); + qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); + if (cm_id_priv->qp_type == IB_QPT_RC || + cm_id_priv->qp_type == IB_QPT_XRC_TGT) { + *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER; + qp_attr->max_dest_rd_atomic = + cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; + } + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_ALT_PATH; + qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; + qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; + qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; + } + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->id.state) { + /* Allow transition to RTS before sending REP */ + case IB_CM_REQ_RCVD: + case IB_CM_MRA_REQ_SENT: + + case IB_CM_REP_RCVD: + case IB_CM_MRA_REP_SENT: + case IB_CM_REP_SENT: + case IB_CM_MRA_REP_RCVD: + case IB_CM_ESTABLISHED: + if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { + *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; + qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); + switch (cm_id_priv->qp_type) { + case IB_QPT_RC: + case IB_QPT_XRC_INI: + *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + /* fall through */ + case IB_QPT_XRC_TGT: + *qp_attr_mask |= IB_QP_TIMEOUT; + qp_attr->timeout = cm_id_priv->av.timeout; + break; + default: + break; + } + if (cm_id_priv->alt_av.ah_attr.dlid) { + *qp_attr_mask |= IB_QP_PATH_MIG_STATE; + qp_attr->path_mig_state = IB_MIG_REARM; + } + } else { + *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; + qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; + qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; + qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; + qp_attr->path_mig_state = IB_MIG_REARM; + } + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct cm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct cm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + case IB_QPS_RTR: + ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(ib_cm_init_qp_attr); + +static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, + char *buf) +{ + struct cm_counter_group *group; + struct cm_counter_attribute *cm_attr; + + group = container_of(obj, struct cm_counter_group, obj); + cm_attr = container_of(attr, struct cm_counter_attribute, attr); + + return sprintf(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); +} + +static const struct sysfs_ops cm_counter_ops = { + .show = cm_show_counter +}; + +static struct kobj_type cm_counter_obj_type = { + .sysfs_ops = &cm_counter_ops, + .default_attrs = cm_counter_default_attrs +}; + +static void cm_release_port_obj(struct kobject *obj) +{ + struct cm_port *cm_port; + + cm_port = container_of(obj, struct cm_port, port_obj); + kfree(cm_port); +} + +static struct kobj_type cm_port_obj_type = { + .release = cm_release_port_obj +}; + +static char *cm_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +struct class cm_class = { + .owner = THIS_MODULE, + .name = "infiniband_cm", + .devnode = cm_devnode, +}; +EXPORT_SYMBOL(cm_class); + +static int cm_create_port_fs(struct cm_port *port) +{ + int i, ret; + + ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, + &port->cm_dev->device->kobj, + "%d", port->port_num); + if (ret) { + kfree(port); + return ret; + } + + for (i = 0; i < CM_COUNTER_GROUPS; i++) { + ret = kobject_init_and_add(&port->counter_group[i].obj, + &cm_counter_obj_type, + &port->port_obj, + "%s", counter_group_names[i]); + if (ret) + goto error; + } + + return 0; + +error: + while (i--) + kobject_put(&port->counter_group[i].obj); + kobject_put(&port->port_obj); + return ret; + +} + +static void cm_remove_port_fs(struct cm_port *port) +{ + int i; + + for (i = 0; i < CM_COUNTER_GROUPS; i++) + kobject_put(&port->counter_group[i].obj); + + kobject_put(&port->port_obj); +} + +static void cm_add_one(struct ib_device *ib_device) +{ + struct cm_device *cm_dev; + struct cm_port *port; + struct ib_mad_reg_req reg_req = { + .mgmt_class = IB_MGMT_CLASS_CM, + .mgmt_class_version = IB_CM_CLASS_VERSION, + }; + struct ib_port_modify port_modify = { + .set_port_cap_mask = IB_PORT_CM_SUP + }; + unsigned long flags; + int ret; + int count = 0; + u8 i; + + cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * + ib_device->phys_port_cnt, GFP_KERNEL); + if (!cm_dev) + return; + + cm_dev->ib_device = ib_device; + cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; + cm_dev->going_down = 0; + cm_dev->device = device_create(&cm_class, &ib_device->dev, + MKDEV(0, 0), NULL, + "%s", ib_device->name); + if (IS_ERR(cm_dev->device)) { + kfree(cm_dev); + return; + } + + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); + for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + + port = kzalloc(sizeof *port, GFP_KERNEL); + if (!port) + goto error1; + + cm_dev->port[i-1] = port; + port->cm_dev = cm_dev; + port->port_num = i; + + INIT_LIST_HEAD(&port->cm_priv_prim_list); + INIT_LIST_HEAD(&port->cm_priv_altr_list); + + ret = cm_create_port_fs(port); + if (ret) + goto error1; + + port->mad_agent = ib_register_mad_agent(ib_device, i, + IB_QPT_GSI, + ®_req, + 0, + cm_send_handler, + cm_recv_handler, + port, + 0); + if (IS_ERR(port->mad_agent)) + goto error2; + + ret = ib_modify_port(ib_device, i, 0, &port_modify); + if (ret) + goto error3; + + count++; + } + + if (!count) + goto free; + + ib_set_client_data(ib_device, &cm_client, cm_dev); + + write_lock_irqsave(&cm.device_lock, flags); + list_add_tail(&cm_dev->list, &cm.device_list); + write_unlock_irqrestore(&cm.device_lock, flags); + return; + +error3: + ib_unregister_mad_agent(port->mad_agent); +error2: + cm_remove_port_fs(port); +error1: + port_modify.set_port_cap_mask = 0; + port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + while (--i) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + + port = cm_dev->port[i-1]; + ib_modify_port(ib_device, port->port_num, 0, &port_modify); + ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); + } +free: + device_unregister(cm_dev->device); + kfree(cm_dev); +} + +static void cm_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct cm_device *cm_dev = client_data; + struct cm_port *port; + struct cm_id_private *cm_id_priv; + struct ib_mad_agent *cur_mad_agent; + struct ib_port_modify port_modify = { + .clr_port_cap_mask = IB_PORT_CM_SUP + }; + unsigned long flags; + int i; + + if (!cm_dev) + return; + + write_lock_irqsave(&cm.device_lock, flags); + list_del(&cm_dev->list); + write_unlock_irqrestore(&cm.device_lock, flags); + + spin_lock_irq(&cm.lock); + cm_dev->going_down = 1; + spin_unlock_irq(&cm.lock); + + for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + + port = cm_dev->port[i-1]; + ib_modify_port(ib_device, port->port_num, 0, &port_modify); + /* Mark all the cm_id's as not valid */ + spin_lock_irq(&cm.lock); + list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) + cm_id_priv->altr_send_port_not_ready = 1; + list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) + cm_id_priv->prim_send_port_not_ready = 1; + spin_unlock_irq(&cm.lock); + /* + * We flush the queue here after the going_down set, this + * verify that no new works will be queued in the recv handler, + * after that we can call the unregister_mad_agent + */ + flush_workqueue(cm.wq); + spin_lock_irq(&cm.state_lock); + cur_mad_agent = port->mad_agent; + port->mad_agent = NULL; + spin_unlock_irq(&cm.state_lock); + ib_unregister_mad_agent(cur_mad_agent); + cm_remove_port_fs(port); + } + + device_unregister(cm_dev->device); + kfree(cm_dev); +} + +static int __init ib_cm_init(void) +{ + int ret; + + memset(&cm, 0, sizeof cm); + INIT_LIST_HEAD(&cm.device_list); + rwlock_init(&cm.device_lock); + spin_lock_init(&cm.lock); + spin_lock_init(&cm.state_lock); + cm.listen_service_table = RB_ROOT; + cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); + cm.remote_id_table = RB_ROOT; + cm.remote_qp_table = RB_ROOT; + cm.remote_sidr_table = RB_ROOT; + idr_init(&cm.local_id_table); + get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); + INIT_LIST_HEAD(&cm.timewait_list); + + ret = class_register(&cm_class); + if (ret) { + ret = -ENOMEM; + goto error1; + } + + cm.wq = create_workqueue("ib_cm"); + if (!cm.wq) { + ret = -ENOMEM; + goto error2; + } + + ret = ib_register_client(&cm_client); + if (ret) + goto error3; + + return 0; +error3: + destroy_workqueue(cm.wq); +error2: + class_unregister(&cm_class); +error1: + idr_destroy(&cm.local_id_table); + return ret; +} + +static void __exit ib_cm_cleanup(void) +{ + struct cm_timewait_info *timewait_info, *tmp; + + spin_lock_irq(&cm.lock); + list_for_each_entry(timewait_info, &cm.timewait_list, list) + cancel_delayed_work(&timewait_info->work.work); + spin_unlock_irq(&cm.lock); + + ib_unregister_client(&cm_client); + destroy_workqueue(cm.wq); + + list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { + list_del(&timewait_info->list); + kfree(timewait_info); + } + + class_unregister(&cm_class); + idr_destroy(&cm.local_id_table); +} + +module_init_order(ib_cm_init, SI_ORDER_SECOND); +module_exit_order(ib_cm_cleanup, SI_ORDER_FIRST); + Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cm.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cma.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cma.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cma.c (revision 320592) @@ -0,0 +1,4307 @@ +/* + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. + * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define LINUXKPI_PARAM_PREFIX ibcore_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "core_priv.h" + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("Generic RDMA CM Agent"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define CMA_CM_RESPONSE_TIMEOUT 20 +#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 +#define CMA_MAX_CM_RETRIES 15 +#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) +#define CMA_IBOE_PACKET_LIFETIME 18 + +static const char * const cma_events[] = { + [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", + [RDMA_CM_EVENT_ADDR_ERROR] = "address error", + [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", + [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", + [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", + [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", + [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", + [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", + [RDMA_CM_EVENT_REJECTED] = "rejected", + [RDMA_CM_EVENT_ESTABLISHED] = "established", + [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", + [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", + [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", + [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", + [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", + [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", +}; + +const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? + cma_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(rdma_event_msg); + +static void cma_add_one(struct ib_device *device); +static void cma_remove_one(struct ib_device *device, void *client_data); + +static struct ib_client cma_client = { + .name = "cma", + .add = cma_add_one, + .remove = cma_remove_one +}; + +static struct ib_sa_client sa_client; +static struct rdma_addr_client addr_client; +static LIST_HEAD(dev_list); +static LIST_HEAD(listen_any_list); +static DEFINE_MUTEX(lock); +static struct workqueue_struct *cma_wq; + +struct cma_pernet { + struct idr tcp_ps; + struct idr udp_ps; + struct idr ipoib_ps; + struct idr ib_ps; +}; + +VNET_DEFINE(struct cma_pernet, cma_pernet); + +static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) +{ + struct cma_pernet *retval; + + CURVNET_SET_QUIET(vnet); + retval = &VNET(cma_pernet); + CURVNET_RESTORE(); + + return (retval); +} + +static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) +{ + struct cma_pernet *pernet = cma_pernet_ptr(net); + + switch (ps) { + case RDMA_PS_TCP: + return &pernet->tcp_ps; + case RDMA_PS_UDP: + return &pernet->udp_ps; + case RDMA_PS_IPOIB: + return &pernet->ipoib_ps; + case RDMA_PS_IB: + return &pernet->ib_ps; + default: + return NULL; + } +} + +struct cma_device { + struct list_head list; + struct ib_device *device; + struct completion comp; + atomic_t refcount; + struct list_head id_list; + struct sysctl_ctx_list sysctl_ctx; + enum ib_gid_type *default_gid_type; +}; + +struct rdma_bind_list { + enum rdma_port_space ps; + struct hlist_head owners; + unsigned short port; +}; + +struct class_port_info_context { + struct ib_class_port_info *class_port_info; + struct ib_device *device; + struct completion done; + struct ib_sa_query *sa_query; + u8 port_num; +}; + +static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, + struct rdma_bind_list *bind_list, int snum) +{ + struct idr *idr = cma_pernet_idr(vnet, ps); + + return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); +} + +static struct rdma_bind_list *cma_ps_find(struct vnet *net, + enum rdma_port_space ps, int snum) +{ + struct idr *idr = cma_pernet_idr(net, ps); + + return idr_find(idr, snum); +} + +static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) +{ + struct idr *idr = cma_pernet_idr(net, ps); + + idr_remove(idr, snum); +} + +enum { + CMA_OPTION_AFONLY, +}; + +void cma_ref_dev(struct cma_device *cma_dev) +{ + atomic_inc(&cma_dev->refcount); +} + +struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, + void *cookie) +{ + struct cma_device *cma_dev; + struct cma_device *found_cma_dev = NULL; + + mutex_lock(&lock); + + list_for_each_entry(cma_dev, &dev_list, list) + if (filter(cma_dev->device, cookie)) { + found_cma_dev = cma_dev; + break; + } + + if (found_cma_dev) + cma_ref_dev(found_cma_dev); + mutex_unlock(&lock); + return found_cma_dev; +} + +int cma_get_default_gid_type(struct cma_device *cma_dev, + unsigned int port) +{ + if (port < rdma_start_port(cma_dev->device) || + port > rdma_end_port(cma_dev->device)) + return -EINVAL; + + return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; +} + +int cma_set_default_gid_type(struct cma_device *cma_dev, + unsigned int port, + enum ib_gid_type default_gid_type) +{ + unsigned long supported_gids; + + if (port < rdma_start_port(cma_dev->device) || + port > rdma_end_port(cma_dev->device)) + return -EINVAL; + + supported_gids = roce_gid_type_mask_support(cma_dev->device, port); + + if (!(supported_gids & 1 << default_gid_type)) + return -EINVAL; + + cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = + default_gid_type; + + return 0; +} + +struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) +{ + return cma_dev->device; +} + +/* + * Device removal can occur at anytime, so we need extra handling to + * serialize notifying the user of device removal with other callbacks. + * We do this by disabling removal notification while a callback is in process, + * and reporting it after the callback completes. + */ +struct rdma_id_private { + struct rdma_cm_id id; + + struct rdma_bind_list *bind_list; + struct hlist_node node; + struct list_head list; /* listen_any_list or cma_device.list */ + struct list_head listen_list; /* per device listens */ + struct cma_device *cma_dev; + struct list_head mc_list; + + int internal_id; + enum rdma_cm_state state; + spinlock_t lock; + struct mutex qp_mutex; + + struct completion comp; + atomic_t refcount; + struct mutex handler_mutex; + + int backlog; + int timeout_ms; + struct ib_sa_query *query; + int query_id; + union { + struct ib_cm_id *ib; + struct iw_cm_id *iw; + } cm_id; + + u32 seq_num; + u32 qkey; + u32 qp_num; + pid_t owner; + u32 options; + u8 srq; + u8 tos; + u8 reuseaddr; + u8 afonly; + enum ib_gid_type gid_type; +}; + +struct cma_multicast { + struct rdma_id_private *id_priv; + union { + struct ib_sa_multicast *ib; + } multicast; + struct list_head list; + void *context; + struct sockaddr_storage addr; + struct kref mcref; + bool igmp_joined; + u8 join_state; +}; + +struct cma_work { + struct work_struct work; + struct rdma_id_private *id; + enum rdma_cm_state old_state; + enum rdma_cm_state new_state; + struct rdma_cm_event event; +}; + +struct cma_ndev_work { + struct work_struct work; + struct rdma_id_private *id; + struct rdma_cm_event event; +}; + +struct iboe_mcast_work { + struct work_struct work; + struct rdma_id_private *id; + struct cma_multicast *mc; +}; + +union cma_ip_addr { + struct in6_addr ip6; + struct { + __be32 pad[3]; + __be32 addr; + } ip4; +}; + +struct cma_hdr { + u8 cma_version; + u8 ip_version; /* IP version: 7:4 */ + __be16 port; + union cma_ip_addr src_addr; + union cma_ip_addr dst_addr; +}; + +#define CMA_VERSION 0x00 + +struct cma_req_info { + struct ib_device *device; + int port; + union ib_gid local_gid; + __be64 service_id; + u16 pkey; + bool has_gid:1; +}; + +static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + ret = (id_priv->state == comp); + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static int cma_comp_exch(struct rdma_id_private *id_priv, + enum rdma_cm_state comp, enum rdma_cm_state exch) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + if ((ret = (id_priv->state == comp))) + id_priv->state = exch; + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, + enum rdma_cm_state exch) +{ + unsigned long flags; + enum rdma_cm_state old; + + spin_lock_irqsave(&id_priv->lock, flags); + old = id_priv->state; + id_priv->state = exch; + spin_unlock_irqrestore(&id_priv->lock, flags); + return old; +} + +static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) +{ + return hdr->ip_version >> 4; +} + +static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) +{ + hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); +} + +static void _cma_attach_to_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + cma_ref_dev(cma_dev); + id_priv->cma_dev = cma_dev; + id_priv->gid_type = 0; + id_priv->id.device = cma_dev->device; + id_priv->id.route.addr.dev_addr.transport = + rdma_node_get_transport(cma_dev->device->node_type); + list_add_tail(&id_priv->list, &cma_dev->id_list); +} + +static void cma_attach_to_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + _cma_attach_to_dev(id_priv, cma_dev); + id_priv->gid_type = + cma_dev->default_gid_type[id_priv->id.port_num - + rdma_start_port(cma_dev->device)]; +} + +void cma_deref_dev(struct cma_device *cma_dev) +{ + if (atomic_dec_and_test(&cma_dev->refcount)) + complete(&cma_dev->comp); +} + +static inline void release_mc(struct kref *kref) +{ + struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); + + kfree(mc->multicast.ib); + kfree(mc); +} + +static void cma_release_dev(struct rdma_id_private *id_priv) +{ + mutex_lock(&lock); + list_del(&id_priv->list); + cma_deref_dev(id_priv->cma_dev); + id_priv->cma_dev = NULL; + mutex_unlock(&lock); +} + +static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *) &id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; +} + +static inline unsigned short cma_family(struct rdma_id_private *id_priv) +{ + return id_priv->id.route.addr.src_addr.ss_family; +} + +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +{ + struct ib_sa_mcmember_rec rec; + int ret = 0; + + if (id_priv->qkey) { + if (qkey && id_priv->qkey != qkey) + return -EINVAL; + return 0; + } + + if (qkey) { + id_priv->qkey = qkey; + return 0; + } + + switch (id_priv->id.ps) { + case RDMA_PS_UDP: + case RDMA_PS_IB: + id_priv->qkey = RDMA_UDP_QKEY; + break; + case RDMA_PS_IPOIB: + ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, + id_priv->id.port_num, &rec.mgid, + &rec); + if (!ret) + id_priv->qkey = be32_to_cpu(rec.qkey); + break; + default: + break; + } + return ret; +} + +static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) +{ + dev_addr->dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); + ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); +} + +static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +{ + int ret; + + if (addr->sa_family != AF_IB) { + ret = rdma_translate_ip(addr, dev_addr, NULL); + } else { + cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); + ret = 0; + } + + return ret; +} + +static inline int cma_validate_port(struct ib_device *device, u8 port, + enum ib_gid_type gid_type, + union ib_gid *gid, int dev_type, + struct vnet *net, + int bound_if_index) +{ + int ret = -ENODEV; + struct net_device *ndev = NULL; + + if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) + return ret; + + if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) + return ret; + + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { + ndev = dev_get_by_index(net, bound_if_index); + if (ndev && ndev->if_flags & IFF_LOOPBACK) { + pr_info("detected loopback device\n"); + dev_put(ndev); + + if (!device->get_netdev) + return -EOPNOTSUPP; + + ndev = device->get_netdev(device, port); + if (!ndev) + return -ENODEV; + } + } else { + gid_type = IB_GID_TYPE_IB; + } + + ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, + ndev, NULL); + + if (ndev) + dev_put(ndev); + + return ret; +} + +static int cma_acquire_dev(struct rdma_id_private *id_priv, + struct rdma_id_private *listen_id_priv) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct cma_device *cma_dev; + union ib_gid gid, iboe_gid, *gidp; + int ret = -ENODEV; + u8 port; + + if (dev_addr->dev_type != ARPHRD_INFINIBAND && + id_priv->id.ps == RDMA_PS_IPOIB) + return -EINVAL; + + mutex_lock(&lock); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &iboe_gid); + + memcpy(&gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof gid); + + if (listen_id_priv) { + cma_dev = listen_id_priv->cma_dev; + port = listen_id_priv->id.port_num; + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; + + ret = cma_validate_port(cma_dev->device, port, + rdma_protocol_ib(cma_dev->device, port) ? + IB_GID_TYPE_IB : + listen_id_priv->gid_type, gidp, + dev_addr->dev_type, + dev_addr->net, + dev_addr->bound_dev_if); + if (!ret) { + id_priv->id.port_num = port; + goto out; + } + } + + list_for_each_entry(cma_dev, &dev_list, list) { + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (listen_id_priv && + listen_id_priv->cma_dev == cma_dev && + listen_id_priv->id.port_num == port) + continue; + + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; + + ret = cma_validate_port(cma_dev->device, port, + rdma_protocol_ib(cma_dev->device, port) ? + IB_GID_TYPE_IB : + cma_dev->default_gid_type[port - 1], + gidp, dev_addr->dev_type, + dev_addr->net, + dev_addr->bound_dev_if); + if (!ret) { + id_priv->id.port_num = port; + goto out; + } + } + } + +out: + if (!ret) + cma_attach_to_dev(id_priv, cma_dev); + + mutex_unlock(&lock); + return ret; +} + +/* + * Select the source IB device and address to reach the destination IB address. + */ +static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev, *cur_dev; + struct sockaddr_ib *addr; + union ib_gid gid, sgid, *dgid; + u16 pkey, index; + u8 p; + int i; + + cma_dev = NULL; + addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); + dgid = (union ib_gid *) &addr->sib_addr; + pkey = ntohs(addr->sib_pkey); + + list_for_each_entry(cur_dev, &dev_list, list) { + for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!rdma_cap_af_ib(cur_dev->device, p)) + continue; + + if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) + continue; + + for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, + &gid, NULL); + i++) { + if (!memcmp(&gid, dgid, sizeof(gid))) { + cma_dev = cur_dev; + sgid = gid; + id_priv->id.port_num = p; + goto found; + } + + if (!cma_dev && (gid.global.subnet_prefix == + dgid->global.subnet_prefix)) { + cma_dev = cur_dev; + sgid = gid; + id_priv->id.port_num = p; + } + } + } + } + + if (!cma_dev) + return -ENODEV; + +found: + cma_attach_to_dev(id_priv, cma_dev); + addr = (struct sockaddr_ib *) cma_src_addr(id_priv); + memcpy(&addr->sib_addr, &sgid, sizeof sgid); + cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); + return 0; +} + +static void cma_deref_id(struct rdma_id_private *id_priv) +{ + if (atomic_dec_and_test(&id_priv->refcount)) + complete(&id_priv->comp); +} + +struct rdma_cm_id *rdma_create_id(struct vnet *net, + rdma_cm_event_handler event_handler, + void *context, enum rdma_port_space ps, + enum ib_qp_type qp_type) +{ + struct rdma_id_private *id_priv; + + id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); + if (!id_priv) + return ERR_PTR(-ENOMEM); + + id_priv->owner = task_pid_nr(current); + id_priv->state = RDMA_CM_IDLE; + id_priv->id.context = context; + id_priv->id.event_handler = event_handler; + id_priv->id.ps = ps; + id_priv->id.qp_type = qp_type; + spin_lock_init(&id_priv->lock); + mutex_init(&id_priv->qp_mutex); + init_completion(&id_priv->comp); + atomic_set(&id_priv->refcount, 1); + mutex_init(&id_priv->handler_mutex); + INIT_LIST_HEAD(&id_priv->listen_list); + INIT_LIST_HEAD(&id_priv->mc_list); + get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); + id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread); + + return &id_priv->id; +} +EXPORT_SYMBOL(rdma_create_id); + +static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); + if (ret) + return ret; + + qp_attr.qp_state = IB_QPS_RTS; + qp_attr.sq_psn = 0; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + + return ret; +} + +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); +} + +int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + struct rdma_id_private *id_priv; + struct ib_qp *qp; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id->device != pd->device) + return -EINVAL; + + qp_init_attr->port_num = id->port_num; + qp = ib_create_qp(pd, qp_init_attr); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + if (id->qp_type == IB_QPT_UD) + ret = cma_init_ud_qp(id_priv, qp); + else + ret = cma_init_conn_qp(id_priv, qp); + if (ret) + goto err; + + id->qp = qp; + id_priv->qp_num = qp->qp_num; + id_priv->srq = (qp->srq != NULL); + return 0; +err: + ib_destroy_qp(qp); + return ret; +} +EXPORT_SYMBOL(rdma_create_qp); + +void rdma_destroy_qp(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + + id_priv = container_of(id, struct rdma_id_private, id); + mutex_lock(&id_priv->qp_mutex); + ib_destroy_qp(id_priv->id.qp); + id_priv->id.qp = NULL; + mutex_unlock(&id_priv->qp_mutex); +} +EXPORT_SYMBOL(rdma_destroy_qp); + +static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + union ib_gid sgid; + + mutex_lock(&id_priv->qp_mutex); + if (!id_priv->id.qp) { + ret = 0; + goto out; + } + + /* Need to update QP attributes from default values. */ + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + goto out; + + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); + if (ret) + goto out; + + qp_attr.qp_state = IB_QPS_RTR; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + goto out; + + ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, + qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); + if (ret) + goto out; + + BUG_ON(id_priv->cma_dev->device != id_priv->id.device); + + if (conn_param) + qp_attr.max_dest_rd_atomic = conn_param->responder_resources; + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); +out: + mutex_unlock(&id_priv->qp_mutex); + return ret; +} + +static int cma_modify_qp_rts(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + mutex_lock(&id_priv->qp_mutex); + if (!id_priv->id.qp) { + ret = 0; + goto out; + } + + qp_attr.qp_state = IB_QPS_RTS; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + goto out; + + if (conn_param) + qp_attr.max_rd_atomic = conn_param->initiator_depth; + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); +out: + mutex_unlock(&id_priv->qp_mutex); + return ret; +} + +static int cma_modify_qp_err(struct rdma_id_private *id_priv) +{ + struct ib_qp_attr qp_attr; + int ret; + + mutex_lock(&id_priv->qp_mutex); + if (!id_priv->id.qp) { + ret = 0; + goto out; + } + + qp_attr.qp_state = IB_QPS_ERR; + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); +out: + mutex_unlock(&id_priv->qp_mutex); + return ret; +} + +static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, + struct ib_qp_attr *qp_attr, int *qp_attr_mask) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int ret; + u16 pkey; + + if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) + pkey = 0xffff; + else + pkey = ib_addr_get_pkey(dev_addr); + + ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, + pkey, &qp_attr->pkey_index); + if (ret) + return ret; + + qp_attr->port_num = id_priv->id.port_num; + *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; + + if (id_priv->id.qp_type == IB_QPT_UD) { + ret = cma_set_qkey(id_priv, 0); + if (ret) + return ret; + + qp_attr->qkey = id_priv->qkey; + *qp_attr_mask |= IB_QP_QKEY; + } else { + qp_attr->qp_access_flags = 0; + *qp_attr_mask |= IB_QP_ACCESS_FLAGS; + } + return 0; +} + +int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct rdma_id_private *id_priv; + int ret = 0; + + id_priv = container_of(id, struct rdma_id_private, id); + if (rdma_cap_ib_cm(id->device, id->port_num)) { + if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) + ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); + else + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, + qp_attr_mask); + + if (qp_attr->qp_state == IB_QPS_RTR) + qp_attr->rq_psn = id_priv->seq_num; + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { + if (!id_priv->cm_id.iw) { + qp_attr->qp_access_flags = 0; + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; + } else + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, + qp_attr_mask); + } else + ret = -ENOSYS; + + return ret; +} +EXPORT_SYMBOL(rdma_init_qp_attr); + +static inline int cma_zero_addr(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); + case AF_INET6: + return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); + case AF_IB: + return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); + default: + return 0; + } +} + +static inline int cma_loopback_addr(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + case AF_INET6: + return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); + case AF_IB: + return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); + default: + return 0; + } +} + +static inline int cma_any_addr(struct sockaddr *addr) +{ + return cma_zero_addr(addr) || cma_loopback_addr(addr); +} + +static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) +{ + if (src->sa_family != dst->sa_family) + return -1; + + switch (src->sa_family) { + case AF_INET: + return ((struct sockaddr_in *) src)->sin_addr.s_addr != + ((struct sockaddr_in *) dst)->sin_addr.s_addr; + case AF_INET6: + return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, + &((struct sockaddr_in6 *) dst)->sin6_addr); + default: + return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, + &((struct sockaddr_ib *) dst)->sib_addr); + } +} + +static __be16 cma_port(struct sockaddr *addr) +{ + struct sockaddr_ib *sib; + + switch (addr->sa_family) { + case AF_INET: + return ((struct sockaddr_in *) addr)->sin_port; + case AF_INET6: + return ((struct sockaddr_in6 *) addr)->sin6_port; + case AF_IB: + sib = (struct sockaddr_ib *) addr; + return htons((u16) (be64_to_cpu(sib->sib_sid) & + be64_to_cpu(sib->sib_sid_mask))); + default: + return 0; + } +} + +static inline int cma_any_port(struct sockaddr *addr) +{ + return !cma_port(addr); +} + +static void cma_save_ib_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct rdma_cm_id *listen_id, + struct ib_sa_path_rec *path) +{ + struct sockaddr_ib *listen_ib, *ib; + + listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; + if (src_addr) { + ib = (struct sockaddr_ib *)src_addr; + ib->sib_family = AF_IB; + if (path) { + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->sgid, 16); + ib->sib_sid = path->service_id; + ib->sib_scope_id = 0; + } else { + ib->sib_pkey = listen_ib->sib_pkey; + ib->sib_flowinfo = listen_ib->sib_flowinfo; + ib->sib_addr = listen_ib->sib_addr; + ib->sib_sid = listen_ib->sib_sid; + ib->sib_scope_id = listen_ib->sib_scope_id; + } + ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); + } + if (dst_addr) { + ib = (struct sockaddr_ib *)dst_addr; + ib->sib_family = AF_IB; + if (path) { + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->dgid, 16); + } + } +} + +static void cma_save_ip4_info(struct sockaddr_in *src_addr, + struct sockaddr_in *dst_addr, + struct cma_hdr *hdr, + __be16 local_port) +{ + if (src_addr) { + *src_addr = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_addr.s_addr = hdr->dst_addr.ip4.addr, + .sin_port = local_port, + }; + } + + if (dst_addr) { + *dst_addr = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_addr.s_addr = hdr->src_addr.ip4.addr, + .sin_port = hdr->port, + }; + } +} + +static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, + struct sockaddr_in6 *dst_addr, + struct cma_hdr *hdr, + __be16 local_port) +{ + if (src_addr) { + *src_addr = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_addr = hdr->dst_addr.ip6, + .sin6_port = local_port, + }; + } + + if (dst_addr) { + *dst_addr = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_addr = hdr->src_addr.ip6, + .sin6_port = hdr->port, + }; + } +} + +static u16 cma_port_from_service_id(__be64 service_id) +{ + return (u16)be64_to_cpu(service_id); +} + +static int cma_save_ip_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct ib_cm_event *ib_event, + __be64 service_id) +{ + struct cma_hdr *hdr; + __be16 port; + + hdr = ib_event->private_data; + if (hdr->cma_version != CMA_VERSION) + return -EINVAL; + + port = htons(cma_port_from_service_id(service_id)); + + switch (cma_get_ip_ver(hdr)) { + case 4: + cma_save_ip4_info((struct sockaddr_in *)src_addr, + (struct sockaddr_in *)dst_addr, hdr, port); + break; + case 6: + cma_save_ip6_info((struct sockaddr_in6 *)src_addr, + (struct sockaddr_in6 *)dst_addr, hdr, port); + break; + default: + return -EAFNOSUPPORT; + } + + return 0; +} + +static int cma_save_net_info(struct sockaddr *src_addr, + struct sockaddr *dst_addr, + struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event, + sa_family_t sa_family, __be64 service_id) +{ + if (sa_family == AF_IB) { + if (ib_event->event == IB_CM_REQ_RECEIVED) + cma_save_ib_info(src_addr, dst_addr, listen_id, + ib_event->param.req_rcvd.primary_path); + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); + return 0; + } + + return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); +} + +static int cma_save_req_info(const struct ib_cm_event *ib_event, + struct cma_req_info *req) +{ + const struct ib_cm_req_event_param *req_param = + &ib_event->param.req_rcvd; + const struct ib_cm_sidr_req_event_param *sidr_param = + &ib_event->param.sidr_req_rcvd; + + switch (ib_event->event) { + case IB_CM_REQ_RECEIVED: + req->device = req_param->listen_id->device; + req->port = req_param->port; + memcpy(&req->local_gid, &req_param->primary_path->sgid, + sizeof(req->local_gid)); + req->has_gid = true; + req->service_id = req_param->primary_path->service_id; + req->pkey = be16_to_cpu(req_param->primary_path->pkey); + if (req->pkey != req_param->bth_pkey) + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" + "RDMA CMA: in the future this may cause the request to be dropped\n", + req_param->bth_pkey, req->pkey); + break; + case IB_CM_SIDR_REQ_RECEIVED: + req->device = sidr_param->listen_id->device; + req->port = sidr_param->port; + req->has_gid = false; + req->service_id = sidr_param->service_id; + req->pkey = sidr_param->pkey; + if (req->pkey != sidr_param->bth_pkey) + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" + "RDMA CMA: in the future this may cause the request to be dropped\n", + sidr_param->bth_pkey, req->pkey); + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool validate_ipv4_net_dev(struct net_device *net_dev, + const struct sockaddr_in *dst_addr, + const struct sockaddr_in *src_addr) +{ +#ifdef INET + struct sockaddr_in dst_tmp = *dst_addr; + __be32 daddr = dst_addr->sin_addr.s_addr, + saddr = src_addr->sin_addr.s_addr; + struct net_device *src_dev; + struct rtentry *rte; + bool ret; + + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || + ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || + ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || + ipv4_is_loopback(saddr)) + return false; + + src_dev = ip_dev_find(net_dev->if_vnet, saddr); + if (src_dev != net_dev) + return false; + + /* + * Make sure the socket address length field + * is set, else rtalloc1() will fail. + */ + dst_tmp.sin_len = sizeof(dst_tmp); + + CURVNET_SET(net_dev->if_vnet); + rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); + CURVNET_RESTORE(); + if (rte != NULL) { + ret = (rte->rt_ifp == net_dev); + RTFREE_LOCKED(rte); + } else { + ret = false; + } + return ret; +#else + return false; +#endif +} + +static bool validate_ipv6_net_dev(struct net_device *net_dev, + const struct sockaddr_in6 *dst_addr, + const struct sockaddr_in6 *src_addr) +{ +#ifdef INET6 + struct sockaddr_in6 dst_tmp = *dst_addr; + struct in6_addr in6_addr = src_addr->sin6_addr; + struct net_device *src_dev; + struct rtentry *rte; + bool ret; + + /* embed scope ID */ + in6_addr.s6_addr[3] = src_addr->sin6_scope_id; + + src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr); + if (src_dev != net_dev) + return false; + + /* + * Make sure the socket address length field + * is set, else rtalloc1() will fail. + */ + dst_tmp.sin6_len = sizeof(dst_tmp); + + CURVNET_SET(net_dev->if_vnet); + rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); + CURVNET_RESTORE(); + if (rte != NULL) { + ret = (rte->rt_ifp == net_dev); + RTFREE_LOCKED(rte); + } else { + ret = false; + } + return ret; +#else + return false; +#endif +} + +static bool validate_net_dev(struct net_device *net_dev, + const struct sockaddr *daddr, + const struct sockaddr *saddr) +{ + const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; + const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; + const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; + const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; + + switch (daddr->sa_family) { + case AF_INET: + return saddr->sa_family == AF_INET && + validate_ipv4_net_dev(net_dev, daddr4, saddr4); + + case AF_INET6: + return saddr->sa_family == AF_INET6 && + validate_ipv6_net_dev(net_dev, daddr6, saddr6); + + default: + return false; + } +} + +static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, + const struct cma_req_info *req) +{ + struct sockaddr_storage listen_addr_storage, src_addr_storage; + struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, + *src_addr = (struct sockaddr *)&src_addr_storage; + struct net_device *net_dev; + const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; + int err; + + err = cma_save_ip_info(listen_addr, src_addr, ib_event, + req->service_id); + if (err) + return ERR_PTR(err); + + net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, + gid, listen_addr); + if (!net_dev) + return ERR_PTR(-ENODEV); + + if (!validate_net_dev(net_dev, listen_addr, src_addr)) { + dev_put(net_dev); + return ERR_PTR(-EHOSTUNREACH); + } + + return net_dev; +} + +static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) +{ + return (be64_to_cpu(service_id) >> 16) & 0xffff; +} + +static bool cma_match_private_data(struct rdma_id_private *id_priv, + const struct cma_hdr *hdr) +{ + struct sockaddr *addr = cma_src_addr(id_priv); + __be32 ip4_addr; + struct in6_addr ip6_addr; + + if (cma_any_addr(addr) && !id_priv->afonly) + return true; + + switch (addr->sa_family) { + case AF_INET: + ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; + if (cma_get_ip_ver(hdr) != 4) + return false; + if (!cma_any_addr(addr) && + hdr->dst_addr.ip4.addr != ip4_addr) + return false; + break; + case AF_INET6: + ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; + if (cma_get_ip_ver(hdr) != 6) + return false; + if (!cma_any_addr(addr) && + memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) + return false; + break; + case AF_IB: + return true; + default: + return false; + } + + return true; +} + +static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) +{ + enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); + enum rdma_transport_type transport = + rdma_node_get_transport(device->node_type); + + return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; +} + +static bool cma_protocol_roce(const struct rdma_cm_id *id) +{ + struct ib_device *device = id->device; + const int port_num = id->port_num ?: rdma_start_port(device); + + return cma_protocol_roce_dev_port(device, port_num); +} + +static bool cma_match_net_dev(const struct rdma_cm_id *id, + const struct net_device *net_dev, + u8 port_num) +{ + const struct rdma_addr *addr = &id->route.addr; + + if (!net_dev) + /* This request is an AF_IB request or a RoCE request */ + return (!id->port_num || id->port_num == port_num) && + (addr->src_addr.ss_family == AF_IB || + cma_protocol_roce_dev_port(id->device, port_num)); + + return !addr->dev_addr.bound_dev_if || + (net_eq(dev_net(net_dev), addr->dev_addr.net) && + addr->dev_addr.bound_dev_if == net_dev->if_index); +} + +static struct rdma_id_private *cma_find_listener( + const struct rdma_bind_list *bind_list, + const struct ib_cm_id *cm_id, + const struct ib_cm_event *ib_event, + const struct cma_req_info *req, + const struct net_device *net_dev) +{ + struct rdma_id_private *id_priv, *id_priv_dev; + + if (!bind_list) + return ERR_PTR(-EINVAL); + + hlist_for_each_entry(id_priv, &bind_list->owners, node) { + if (cma_match_private_data(id_priv, ib_event->private_data)) { + if (id_priv->id.device == cm_id->device && + cma_match_net_dev(&id_priv->id, net_dev, req->port)) + return id_priv; + list_for_each_entry(id_priv_dev, + &id_priv->listen_list, + listen_list) { + if (id_priv_dev->id.device == cm_id->device && + cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) + return id_priv_dev; + } + } + } + + return ERR_PTR(-EINVAL); +} + +static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, + struct ib_cm_event *ib_event, + struct net_device **net_dev) +{ + struct cma_req_info req; + struct rdma_bind_list *bind_list; + struct rdma_id_private *id_priv; + int err; + + err = cma_save_req_info(ib_event, &req); + if (err) + return ERR_PTR(err); + + *net_dev = cma_get_net_dev(ib_event, &req); + if (IS_ERR(*net_dev)) { + if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { + /* Assuming the protocol is AF_IB */ + *net_dev = NULL; + } else if (cma_protocol_roce_dev_port(req.device, req.port)) { + /* TODO find the net dev matching the request parameters + * through the RoCE GID table */ + *net_dev = NULL; + } else { + return ERR_CAST(*net_dev); + } + } + + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, + rdma_ps_from_service_id(req.service_id), + cma_port_from_service_id(req.service_id)); + id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); + if (IS_ERR(id_priv) && *net_dev) { + dev_put(*net_dev); + *net_dev = NULL; + } + + return id_priv; +} + +static inline int cma_user_data_offset(struct rdma_id_private *id_priv) +{ + return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); +} + +static void cma_cancel_route(struct rdma_id_private *id_priv) +{ + if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { + if (id_priv->query) + ib_sa_cancel_query(id_priv->query_id, id_priv->query); + } +} + +static void cma_cancel_listens(struct rdma_id_private *id_priv) +{ + struct rdma_id_private *dev_id_priv; + + /* + * Remove from listen_any_list to prevent added devices from spawning + * additional listen requests. + */ + mutex_lock(&lock); + list_del(&id_priv->list); + + while (!list_empty(&id_priv->listen_list)) { + dev_id_priv = list_entry(id_priv->listen_list.next, + struct rdma_id_private, listen_list); + /* sync with device removal to avoid duplicate destruction */ + list_del_init(&dev_id_priv->list); + list_del(&dev_id_priv->listen_list); + mutex_unlock(&lock); + + rdma_destroy_id(&dev_id_priv->id); + mutex_lock(&lock); + } + mutex_unlock(&lock); +} + +static void cma_cancel_operation(struct rdma_id_private *id_priv, + enum rdma_cm_state state) +{ + switch (state) { + case RDMA_CM_ADDR_QUERY: + rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); + break; + case RDMA_CM_ROUTE_QUERY: + cma_cancel_route(id_priv); + break; + case RDMA_CM_LISTEN: + if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) + cma_cancel_listens(id_priv); + break; + default: + break; + } +} + +static void cma_release_port(struct rdma_id_private *id_priv) +{ + struct rdma_bind_list *bind_list = id_priv->bind_list; + struct vnet *net = id_priv->id.route.addr.dev_addr.net; + + if (!bind_list) + return; + + mutex_lock(&lock); + hlist_del(&id_priv->node); + if (hlist_empty(&bind_list->owners)) { + cma_ps_remove(net, bind_list->ps, bind_list->port); + kfree(bind_list); + } + mutex_unlock(&lock); +} + +static void cma_leave_mc_groups(struct rdma_id_private *id_priv) +{ + struct cma_multicast *mc; + + while (!list_empty(&id_priv->mc_list)) { + mc = container_of(id_priv->mc_list.next, + struct cma_multicast, list); + list_del(&mc->list); + if (rdma_cap_ib_mcast(id_priv->cma_dev->device, + id_priv->id.port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + } else { + if (mc->igmp_joined) { + struct rdma_dev_addr *dev_addr = + &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, + dev_addr->bound_dev_if); + if (ndev) { + dev_put(ndev); + } + } + kref_put(&mc->mcref, release_mc); + } + } +} + +void rdma_destroy_id(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + enum rdma_cm_state state; + + id_priv = container_of(id, struct rdma_id_private, id); + state = cma_exch(id_priv, RDMA_CM_DESTROYING); + cma_cancel_operation(id_priv, state); + + /* + * Wait for any active callback to finish. New callbacks will find + * the id_priv state set to destroying and abort. + */ + mutex_lock(&id_priv->handler_mutex); + mutex_unlock(&id_priv->handler_mutex); + + if (id_priv->cma_dev) { + if (rdma_cap_ib_cm(id_priv->id.device, 1)) { + if (id_priv->cm_id.ib) + ib_destroy_cm_id(id_priv->cm_id.ib); + } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { + if (id_priv->cm_id.iw) + iw_destroy_cm_id(id_priv->cm_id.iw); + } + cma_leave_mc_groups(id_priv); + cma_release_dev(id_priv); + } + + cma_release_port(id_priv); + cma_deref_id(id_priv); + wait_for_completion(&id_priv->comp); + + if (id_priv->internal_id) + cma_deref_id(id_priv->id.context); + + kfree(id_priv->id.route.path_rec); + kfree(id_priv); +} +EXPORT_SYMBOL(rdma_destroy_id); + +static int cma_rep_recv(struct rdma_id_private *id_priv) +{ + int ret; + + ret = cma_modify_qp_rtr(id_priv, NULL); + if (ret) + goto reject; + + ret = cma_modify_qp_rts(id_priv, NULL); + if (ret) + goto reject; + + ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(id_priv); + ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); + return ret; +} + +static void cma_set_rep_event_data(struct rdma_cm_event *event, + struct ib_cm_rep_event_param *rep_data, + void *private_data) +{ + event->param.conn.private_data = private_data; + event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + event->param.conn.responder_resources = rep_data->responder_resources; + event->param.conn.initiator_depth = rep_data->initiator_depth; + event->param.conn.flow_control = rep_data->flow_control; + event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; + event->param.conn.srq = rep_data->srq; + event->param.conn.qp_num = rep_data->remote_qpn; +} + +static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv = cm_id->context; + struct rdma_cm_event event; + int ret = 0; + + mutex_lock(&id_priv->handler_mutex); + if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && + id_priv->state != RDMA_CM_CONNECT) || + (ib_event->event == IB_CM_TIMEWAIT_EXIT && + id_priv->state != RDMA_CM_DISCONNECT)) + goto out; + + memset(&event, 0, sizeof event); + switch (ib_event->event) { + case IB_CM_REQ_ERROR: + case IB_CM_REP_ERROR: + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; + break; + case IB_CM_REP_RECEIVED: + if (id_priv->id.qp) { + event.status = cma_rep_recv(id_priv); + event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : + RDMA_CM_EVENT_ESTABLISHED; + } else { + event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; + } + cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, + ib_event->private_data); + break; + case IB_CM_RTU_RECEIVED: + case IB_CM_USER_ESTABLISHED: + event.event = RDMA_CM_EVENT_ESTABLISHED; + break; + case IB_CM_DREQ_ERROR: + event.status = -ETIMEDOUT; /* fall through */ + case IB_CM_DREQ_RECEIVED: + case IB_CM_DREP_RECEIVED: + if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, + RDMA_CM_DISCONNECT)) + goto out; + event.event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IB_CM_TIMEWAIT_EXIT: + event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; + break; + case IB_CM_MRA_RECEIVED: + /* ignore event */ + goto out; + case IB_CM_REJ_RECEIVED: + cma_modify_qp_err(id_priv); + event.status = ib_event->param.rej_rcvd.reason; + event.event = RDMA_CM_EVENT_REJECTED; + event.param.conn.private_data = ib_event->private_data; + event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; + break; + default: + pr_err("RDMA CMA: unexpected IB CM event: %d\n", + ib_event->event); + goto out; + } + + ret = id_priv->id.event_handler(&id_priv->id, &event); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.ib = NULL; + cma_exch(id_priv, RDMA_CM_DESTROYING); + mutex_unlock(&id_priv->handler_mutex); + rdma_destroy_id(&id_priv->id); + return ret; + } +out: + mutex_unlock(&id_priv->handler_mutex); + return ret; +} + +static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event, + struct net_device *net_dev) +{ + struct rdma_id_private *id_priv; + struct rdma_cm_id *id; + struct rdma_route *rt; + const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + const __be64 service_id = + ib_event->param.req_rcvd.primary_path->service_id; + int ret; + + id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, + listen_id->ps, ib_event->param.req_rcvd.qp_type); + if (IS_ERR(id)) + return NULL; + + id_priv = container_of(id, struct rdma_id_private, id); + if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, + (struct sockaddr *)&id->route.addr.dst_addr, + listen_id, ib_event, ss_family, service_id)) + goto err; + + rt = &id->route; + rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, + GFP_KERNEL); + if (!rt->path_rec) + goto err; + + rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; + if (rt->num_paths == 2) + rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; + + if (net_dev) { + ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); + if (ret) + goto err; + } else { + if (!cma_protocol_roce(listen_id) && + cma_any_addr(cma_src_addr(id_priv))) { + rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); + } else if (!cma_any_addr(cma_src_addr(id_priv))) { + ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); + if (ret) + goto err; + } + } + rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); + + id_priv->state = RDMA_CM_CONNECT; + return id_priv; + +err: + rdma_destroy_id(id); + return NULL; +} + +static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event, + struct net_device *net_dev) +{ + struct rdma_id_private *id_priv; + struct rdma_cm_id *id; + const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + struct vnet *net = listen_id->route.addr.dev_addr.net; + int ret; + + id = rdma_create_id(net, listen_id->event_handler, listen_id->context, + listen_id->ps, IB_QPT_UD); + if (IS_ERR(id)) + return NULL; + + id_priv = container_of(id, struct rdma_id_private, id); + if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, + (struct sockaddr *)&id->route.addr.dst_addr, + listen_id, ib_event, ss_family, + ib_event->param.sidr_req_rcvd.service_id)) + goto err; + + if (net_dev) { + ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); + if (ret) + goto err; + } else { + if (!cma_any_addr(cma_src_addr(id_priv))) { + ret = cma_translate_addr(cma_src_addr(id_priv), + &id->route.addr.dev_addr); + if (ret) + goto err; + } + } + + id_priv->state = RDMA_CM_CONNECT; + return id_priv; +err: + rdma_destroy_id(id); + return NULL; +} + +static void cma_set_req_event_data(struct rdma_cm_event *event, + struct ib_cm_req_event_param *req_data, + void *private_data, int offset) +{ + event->param.conn.private_data = (char *)private_data + offset; + event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; + event->param.conn.responder_resources = req_data->responder_resources; + event->param.conn.initiator_depth = req_data->initiator_depth; + event->param.conn.flow_control = req_data->flow_control; + event->param.conn.retry_count = req_data->retry_count; + event->param.conn.rnr_retry_count = req_data->rnr_retry_count; + event->param.conn.srq = req_data->srq; + event->param.conn.qp_num = req_data->remote_qpn; +} + +static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +{ + return (((ib_event->event == IB_CM_REQ_RECEIVED) && + (ib_event->param.req_rcvd.qp_type == id->qp_type)) || + ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && + (id->qp_type == IB_QPT_UD)) || + (!id->qp_type)); +} + +static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) +{ + struct rdma_id_private *listen_id, *conn_id = NULL; + struct rdma_cm_event event; + struct net_device *net_dev; + int offset, ret; + + listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); + if (IS_ERR(listen_id)) + return PTR_ERR(listen_id); + + if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { + ret = -EINVAL; + goto net_dev_put; + } + + mutex_lock(&listen_id->handler_mutex); + if (listen_id->state != RDMA_CM_LISTEN) { + ret = -ECONNABORTED; + goto err1; + } + + memset(&event, 0, sizeof event); + offset = cma_user_data_offset(listen_id); + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { + conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); + event.param.ud.private_data = (char *)ib_event->private_data + offset; + event.param.ud.private_data_len = + IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; + } else { + conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); + cma_set_req_event_data(&event, &ib_event->param.req_rcvd, + ib_event->private_data, offset); + } + if (!conn_id) { + ret = -ENOMEM; + goto err1; + } + + mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); + ret = cma_acquire_dev(conn_id, listen_id); + if (ret) + goto err2; + + conn_id->cm_id.ib = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_ib_handler; + + /* + * Protect against the user destroying conn_id from another thread + * until we're done accessing it. + */ + atomic_inc(&conn_id->refcount); + ret = conn_id->id.event_handler(&conn_id->id, &event); + if (ret) + goto err3; + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, RDMA_CM_CONNECT) && + (conn_id->id.qp_type != IB_QPT_UD)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); + mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); + cma_deref_id(conn_id); + if (net_dev) + dev_put(net_dev); + return 0; + +err3: + cma_deref_id(conn_id); + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.ib = NULL; +err2: + cma_exch(conn_id, RDMA_CM_DESTROYING); + mutex_unlock(&conn_id->handler_mutex); +err1: + mutex_unlock(&listen_id->handler_mutex); + if (conn_id) + rdma_destroy_id(&conn_id->id); + +net_dev_put: + if (net_dev) + dev_put(net_dev); + + return ret; +} + +__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) +{ + if (addr->sa_family == AF_IB) + return ((struct sockaddr_ib *) addr)->sib_sid; + + return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); +} +EXPORT_SYMBOL(rdma_get_service_id); + +static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) +{ + struct rdma_id_private *id_priv = iw_id->context; + struct rdma_cm_event event; + int ret = 0; + struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; + struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; + + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_CONNECT) + goto out; + + memset(&event, 0, sizeof event); + switch (iw_event->event) { + case IW_CM_EVENT_CLOSE: + event.event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IW_CM_EVENT_CONNECT_REPLY: + memcpy(cma_src_addr(id_priv), laddr, + rdma_addr_size(laddr)); + memcpy(cma_dst_addr(id_priv), raddr, + rdma_addr_size(raddr)); + switch (iw_event->status) { + case 0: + event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + break; + case -ECONNRESET: + case -ECONNREFUSED: + event.event = RDMA_CM_EVENT_REJECTED; + break; + case -ETIMEDOUT: + event.event = RDMA_CM_EVENT_UNREACHABLE; + break; + default: + event.event = RDMA_CM_EVENT_CONNECT_ERROR; + break; + } + break; + case IW_CM_EVENT_ESTABLISHED: + event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + break; + default: + BUG_ON(1); + } + + event.status = iw_event->status; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + ret = id_priv->id.event_handler(&id_priv->id, &event); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.iw = NULL; + cma_exch(id_priv, RDMA_CM_DESTROYING); + mutex_unlock(&id_priv->handler_mutex); + rdma_destroy_id(&id_priv->id); + return ret; + } + +out: + mutex_unlock(&id_priv->handler_mutex); + return ret; +} + +static int iw_conn_req_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct rdma_cm_id *new_cm_id; + struct rdma_id_private *listen_id, *conn_id; + struct rdma_cm_event event; + int ret = -ECONNABORTED; + struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; + struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; + + listen_id = cm_id->context; + + mutex_lock(&listen_id->handler_mutex); + if (listen_id->state != RDMA_CM_LISTEN) + goto out; + + /* Create a new RDMA id for the new IW CM ID */ + new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(new_cm_id)) { + ret = -ENOMEM; + goto out; + } + conn_id = container_of(new_cm_id, struct rdma_id_private, id); + mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); + conn_id->state = RDMA_CM_CONNECT; + + ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); + if (ret) { + mutex_unlock(&conn_id->handler_mutex); + rdma_destroy_id(new_cm_id); + goto out; + } + + ret = cma_acquire_dev(conn_id, listen_id); + if (ret) { + mutex_unlock(&conn_id->handler_mutex); + rdma_destroy_id(new_cm_id); + goto out; + } + + conn_id->cm_id.iw = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_iw_handler; + + memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); + memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); + + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_CONNECT_REQUEST; + event.param.conn.private_data = iw_event->private_data; + event.param.conn.private_data_len = iw_event->private_data_len; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; + + /* + * Protect against the user destroying conn_id from another thread + * until we're done accessing it. + */ + atomic_inc(&conn_id->refcount); + ret = conn_id->id.event_handler(&conn_id->id, &event); + if (ret) { + /* User wants to destroy the CM ID */ + conn_id->cm_id.iw = NULL; + cma_exch(conn_id, RDMA_CM_DESTROYING); + mutex_unlock(&conn_id->handler_mutex); + cma_deref_id(conn_id); + rdma_destroy_id(&conn_id->id); + goto out; + } + + mutex_unlock(&conn_id->handler_mutex); + cma_deref_id(conn_id); + +out: + mutex_unlock(&listen_id->handler_mutex); + return ret; +} + +static int cma_ib_listen(struct rdma_id_private *id_priv) +{ + struct sockaddr *addr; + struct ib_cm_id *id; + __be64 svc_id; + + addr = cma_src_addr(id_priv); + svc_id = rdma_get_service_id(&id_priv->id, addr); + id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); + if (IS_ERR(id)) + return PTR_ERR(id); + id_priv->cm_id.ib = id; + + return 0; +} + +static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) +{ + int ret; + struct iw_cm_id *id; + + id = iw_create_cm_id(id_priv->id.device, + iw_conn_req_handler, + id_priv); + if (IS_ERR(id)) + return PTR_ERR(id); + + id->tos = id_priv->tos; + id_priv->cm_id.iw = id; + + memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); + + ret = iw_cm_listen(id_priv->cm_id.iw, backlog); + + if (ret) { + iw_destroy_cm_id(id_priv->cm_id.iw); + id_priv->cm_id.iw = NULL; + } + + return ret; +} + +static int cma_listen_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct rdma_id_private *id_priv = id->context; + + id->context = id_priv->id.context; + id->event_handler = id_priv->id.event_handler; + return id_priv->id.event_handler(id, event); +} + +static void cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev) +{ + struct rdma_id_private *dev_id_priv; + struct rdma_cm_id *id; + struct vnet *net = id_priv->id.route.addr.dev_addr.net; + int ret; + + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) + return; + + id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, + id_priv->id.qp_type); + if (IS_ERR(id)) + return; + + dev_id_priv = container_of(id, struct rdma_id_private, id); + + dev_id_priv->state = RDMA_CM_ADDR_BOUND; + memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); + + _cma_attach_to_dev(dev_id_priv, cma_dev); + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + atomic_inc(&id_priv->refcount); + dev_id_priv->internal_id = 1; + dev_id_priv->afonly = id_priv->afonly; + + ret = rdma_listen(id, id_priv->backlog); + if (ret) + pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", + ret, cma_dev->device->name); +} + +static void cma_listen_on_all(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev; + + mutex_lock(&lock); + list_add_tail(&id_priv->list, &listen_any_list); + list_for_each_entry(cma_dev, &dev_list, list) + cma_listen_on_dev(id_priv, cma_dev); + mutex_unlock(&lock); +} + +void rdma_set_service_type(struct rdma_cm_id *id, int tos) +{ + struct rdma_id_private *id_priv; + + id_priv = container_of(id, struct rdma_id_private, id); + id_priv->tos = (u8) tos; +} +EXPORT_SYMBOL(rdma_set_service_type); + +static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, + void *context) +{ + struct cma_work *work = context; + struct rdma_route *route; + + route = &work->id->id.route; + + if (!status) { + route->num_paths = 1; + *route->path_rec = *path_rec; + } else { + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; + } + + queue_work(cma_wq, &work->work); +} + +static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, + struct cma_work *work) +{ + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct ib_sa_path_rec path_rec; + ib_sa_comp_mask comp_mask; + struct sockaddr_in6 *sin6; + struct sockaddr_ib *sib; + + memset(&path_rec, 0, sizeof path_rec); + rdma_addr_get_sgid(dev_addr, &path_rec.sgid); + rdma_addr_get_dgid(dev_addr, &path_rec.dgid); + path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); + path_rec.numb_path = 1; + path_rec.reversible = 1; + path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); + + comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; + + switch (cma_family(id_priv)) { + case AF_INET: + path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); + comp_mask |= IB_SA_PATH_REC_QOS_CLASS; + break; + case AF_INET6: + sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); + path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); + comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + break; + case AF_IB: + sib = (struct sockaddr_ib *) cma_src_addr(id_priv); + path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); + comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + break; + } + + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, + id_priv->id.port_num, &path_rec, + comp_mask, timeout_ms, + GFP_KERNEL, cma_query_handler, + work, &id_priv->query); + + return (id_priv->query_id < 0) ? id_priv->query_id : 0; +} + +static void cma_work_handler(struct work_struct *_work) +{ + struct cma_work *work = container_of(_work, struct cma_work, work); + struct rdma_id_private *id_priv = work->id; + int destroy = 0; + + mutex_lock(&id_priv->handler_mutex); + if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) + goto out; + + if (id_priv->id.event_handler(&id_priv->id, &work->event)) { + cma_exch(id_priv, RDMA_CM_DESTROYING); + destroy = 1; + } +out: + mutex_unlock(&id_priv->handler_mutex); + cma_deref_id(id_priv); + if (destroy) + rdma_destroy_id(&id_priv->id); + kfree(work); +} + +static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct rdma_route *route = &id_priv->id.route; + struct cma_work *work; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + + route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + ret = cma_query_ib_route(id_priv, timeout_ms, work); + if (ret) + goto err2; + + return 0; +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + +int rdma_set_ib_paths(struct rdma_cm_id *id, + struct ib_sa_path_rec *path_rec, int num_paths) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_RESOLVED)) + return -EINVAL; + + id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, + GFP_KERNEL); + if (!id->route.path_rec) { + ret = -ENOMEM; + goto err; + } + + id->route.num_paths = num_paths; + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); + return ret; +} +EXPORT_SYMBOL(rdma_set_ib_paths); + +static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct cma_work *work; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +} + +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + /* TODO: Implement this function */ + return 0; +} + +static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, + unsigned long supported_gids, + enum ib_gid_type default_gid) +{ + if ((network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) && + test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) + return IB_GID_TYPE_ROCE_UDP_ENCAP; + + return default_gid; +} + +static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) +{ + struct rdma_route *route = &id_priv->id.route; + struct rdma_addr *addr = &route->addr; + struct cma_work *work; + int ret; + struct net_device *ndev = NULL; + + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + + route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + route->num_paths = 1; + + if (addr->dev_addr.bound_dev_if) { + unsigned long supported_gids; + + ndev = dev_get_by_index(addr->dev_addr.net, + addr->dev_addr.bound_dev_if); + if (!ndev) { + ret = -ENODEV; + goto err2; + } + + if (ndev->if_flags & IFF_LOOPBACK) { + dev_put(ndev); + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } + + ndev = id_priv->id.device->get_netdev(id_priv->id.device, + id_priv->id.port_num); + if (!ndev) { + ret = -ENODEV; + goto err2; + } + } + + route->path_rec->net = ndev->if_vnet; + route->path_rec->ifindex = ndev->if_index; + supported_gids = roce_gid_type_mask_support(id_priv->id.device, + id_priv->id.port_num); + route->path_rec->gid_type = + cma_route_gid_type(addr->dev_addr.network, + supported_gids, + id_priv->gid_type); + } + if (!ndev) { + ret = -ENODEV; + goto err2; + } + + memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); + + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &route->path_rec->sgid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, + &route->path_rec->dgid); + + /* Use the hint from IP Stack to select GID Type */ + if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) + route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); + if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) + /* TODO: get the hoplimit from the inet/inet6 device */ + route->path_rec->hop_limit = addr->dev_addr.hoplimit; + else + route->path_rec->hop_limit = 1; + route->path_rec->reversible = 1; + route->path_rec->pkey = cpu_to_be16(0xffff); + route->path_rec->mtu_selector = IB_SA_EQ; + route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); + route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); + route->path_rec->rate_selector = IB_SA_EQ; + route->path_rec->rate = iboe_get_rate(ndev); + dev_put(ndev); + route->path_rec->packet_life_time_selector = IB_SA_EQ; + route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { + ret = -EINVAL; + goto err2; + } + + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + work->event.status = 0; + + queue_work(cma_wq, &work->work); + + return 0; + +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + +int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + if (rdma_cap_ib_sa(id->device, id->port_num)) + ret = cma_resolve_ib_route(id_priv, timeout_ms); + else if (rdma_protocol_roce(id->device, id->port_num)) + ret = cma_resolve_iboe_route(id_priv); + else if (rdma_protocol_iwarp(id->device, id->port_num)) + ret = cma_resolve_iw_route(id_priv, timeout_ms); + else + ret = -ENOSYS; + + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_route); + +static void cma_set_loopback(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, + 0, 0, 0, htonl(1)); + break; + default: + ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, + 0, 0, 0, htonl(1)); + break; + } +} + +static int cma_bind_loopback(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev, *cur_dev; + struct ib_port_attr port_attr; + union ib_gid gid; + u16 pkey; + int ret; + u8 p; + + cma_dev = NULL; + mutex_lock(&lock); + list_for_each_entry(cur_dev, &dev_list, list) { + if (cma_family(id_priv) == AF_IB && + !rdma_cap_ib_cm(cur_dev->device, 1)) + continue; + + if (!cma_dev) + cma_dev = cur_dev; + + for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!ib_query_port(cur_dev->device, p, &port_attr) && + port_attr.state == IB_PORT_ACTIVE) { + cma_dev = cur_dev; + goto port_found; + } + } + } + + if (!cma_dev) { + ret = -ENODEV; + goto out; + } + + p = 1; + +port_found: + ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); + if (ret) + goto out; + + ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); + if (ret) + goto out; + + id_priv->id.route.addr.dev_addr.dev_type = + (rdma_protocol_ib(cma_dev->device, p)) ? + ARPHRD_INFINIBAND : ARPHRD_ETHER; + + rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); + ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); + id_priv->id.port_num = p; + cma_attach_to_dev(id_priv, cma_dev); + cma_set_loopback(cma_src_addr(id_priv)); +out: + mutex_unlock(&lock); + return ret; +} + +static void addr_handler(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *dev_addr, void *context) +{ + struct rdma_id_private *id_priv = context; + struct rdma_cm_event event; + + memset(&event, 0, sizeof event); + mutex_lock(&id_priv->handler_mutex); + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, + RDMA_CM_ADDR_RESOLVED)) + goto out; + + memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); + if (!status && !id_priv->cma_dev) + status = cma_acquire_dev(id_priv, NULL); + + if (status) { + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ADDR_BOUND)) + goto out; + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = status; + } else + event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + + if (id_priv->id.event_handler(&id_priv->id, &event)) { + cma_exch(id_priv, RDMA_CM_DESTROYING); + mutex_unlock(&id_priv->handler_mutex); + cma_deref_id(id_priv); + rdma_destroy_id(&id_priv->id); + return; + } +out: + mutex_unlock(&id_priv->handler_mutex); + cma_deref_id(id_priv); +} + +static int cma_resolve_loopback(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + union ib_gid gid; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (!id_priv->cma_dev) { + ret = cma_bind_loopback(id_priv); + if (ret) + goto err; + } + + rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +err: + kfree(work); + return ret; +} + +static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (!id_priv->cma_dev) { + ret = cma_resolve_ib_dev(id_priv); + if (ret) + goto err; + } + + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) + &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +err: + kfree(work); + return ret; +} + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr) +{ + if (!src_addr || !src_addr->sa_family) { + src_addr = (struct sockaddr *) &id->route.addr.src_addr; + src_addr->sa_family = dst_addr->sa_family; + if (dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; + struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr)) + id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *) src_addr)->sib_pkey = + ((struct sockaddr_ib *) dst_addr)->sib_pkey; + } + } + return rdma_bind_addr(id, src_addr); +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == RDMA_CM_IDLE) { + ret = cma_bind_addr(id, src_addr, dst_addr); + if (ret) + return ret; + } + + if (cma_family(id_priv) != dst_addr->sa_family) + return -EINVAL; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), + dst_addr, &id->route.addr.dev_addr, + timeout_ms, addr_handler, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (reuse || id_priv->state == RDMA_CM_IDLE) { + id_priv->reuseaddr = reuse; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_reuseaddr); + +int rdma_set_afonly(struct rdma_cm_id *id, int afonly) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { + id_priv->options |= (1 << CMA_OPTION_AFONLY); + id_priv->afonly = afonly; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_afonly); + +static void cma_bind_port(struct rdma_bind_list *bind_list, + struct rdma_id_private *id_priv) +{ + struct sockaddr *addr; + struct sockaddr_ib *sib; + u64 sid, mask; + __be16 port; + + addr = cma_src_addr(id_priv); + port = htons(bind_list->port); + + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_port = port; + break; + case AF_INET6: + ((struct sockaddr_in6 *) addr)->sin6_port = port; + break; + case AF_IB: + sib = (struct sockaddr_ib *) addr; + sid = be64_to_cpu(sib->sib_sid); + mask = be64_to_cpu(sib->sib_sid_mask); + sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); + sib->sib_sid_mask = cpu_to_be64(~0ULL); + break; + } + id_priv->bind_list = bind_list; + hlist_add_head(&id_priv->node, &bind_list->owners); +} + +static int cma_alloc_port(enum rdma_port_space ps, + struct rdma_id_private *id_priv, unsigned short snum) +{ + struct rdma_bind_list *bind_list; + int ret; + + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); + if (!bind_list) + return -ENOMEM; + + ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, + snum); + if (ret < 0) + goto err; + + bind_list->ps = ps; + bind_list->port = (unsigned short)ret; + cma_bind_port(bind_list, id_priv); + return 0; +err: + kfree(bind_list); + return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; +} + +static int cma_alloc_any_port(enum rdma_port_space ps, + struct rdma_id_private *id_priv) +{ + static unsigned int last_used_port; + int low, high, remaining; + unsigned int rover; + struct vnet *net = id_priv->id.route.addr.dev_addr.net; + u32 rand; + + inet_get_local_port_range(net, &low, &high); + remaining = (high - low) + 1; + get_random_bytes(&rand, sizeof(rand)); + rover = rand % remaining + low; +retry: + if (last_used_port != rover && + !cma_ps_find(net, ps, (unsigned short)rover)) { + int ret = cma_alloc_port(ps, id_priv, rover); + /* + * Remember previously used port number in order to avoid + * re-using same port immediately after it is closed. + */ + if (!ret) + last_used_port = rover; + if (ret != -EADDRNOTAVAIL) + return ret; + } + if (--remaining) { + rover++; + if ((rover < low) || (rover > high)) + rover = low; + goto retry; + } + return -EADDRNOTAVAIL; +} + +/* + * Check that the requested port is available. This is called when trying to + * bind to a specific port, or when trying to listen on a bound port. In + * the latter case, the provided id_priv may already be on the bind_list, but + * we still need to check that it's okay to start listening. + */ +static int cma_check_port(struct rdma_bind_list *bind_list, + struct rdma_id_private *id_priv, uint8_t reuseaddr) +{ + struct rdma_id_private *cur_id; + struct sockaddr *addr, *cur_addr; + + addr = cma_src_addr(id_priv); + hlist_for_each_entry(cur_id, &bind_list->owners, node) { + if (id_priv == cur_id) + continue; + + if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && + cur_id->reuseaddr) + continue; + + cur_addr = cma_src_addr(cur_id); + if (id_priv->afonly && cur_id->afonly && + (addr->sa_family != cur_addr->sa_family)) + continue; + + if (cma_any_addr(addr) || cma_any_addr(cur_addr)) + return -EADDRNOTAVAIL; + + if (!cma_addr_cmp(addr, cur_addr)) + return -EADDRINUSE; + } + return 0; +} + +static int cma_use_port(enum rdma_port_space ps, + struct rdma_id_private *id_priv) +{ + struct rdma_bind_list *bind_list; + unsigned short snum; + int ret; + + snum = ntohs(cma_port(cma_src_addr(id_priv))); + if (snum < IPPORT_RESERVED && + priv_check(curthread, PRIV_NETINET_BINDANY) != 0) + return -EACCES; + + bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); + if (!bind_list) { + ret = cma_alloc_port(ps, id_priv, snum); + } else { + ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); + if (!ret) + cma_bind_port(bind_list, id_priv); + } + return ret; +} + +static int cma_bind_listen(struct rdma_id_private *id_priv) +{ + struct rdma_bind_list *bind_list = id_priv->bind_list; + int ret = 0; + + mutex_lock(&lock); + if (bind_list->owners.first->next) + ret = cma_check_port(bind_list, id_priv, 0); + mutex_unlock(&lock); + return ret; +} + +static enum rdma_port_space cma_select_inet_ps( + struct rdma_id_private *id_priv) +{ + switch (id_priv->id.ps) { + case RDMA_PS_TCP: + case RDMA_PS_UDP: + case RDMA_PS_IPOIB: + case RDMA_PS_IB: + return id_priv->id.ps; + default: + + return 0; + } +} + +static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) +{ + enum rdma_port_space ps = 0; + struct sockaddr_ib *sib; + u64 sid_ps, mask, sid; + + sib = (struct sockaddr_ib *) cma_src_addr(id_priv); + mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; + sid = be64_to_cpu(sib->sib_sid) & mask; + + if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { + sid_ps = RDMA_IB_IP_PS_IB; + ps = RDMA_PS_IB; + } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && + (sid == (RDMA_IB_IP_PS_TCP & mask))) { + sid_ps = RDMA_IB_IP_PS_TCP; + ps = RDMA_PS_TCP; + } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && + (sid == (RDMA_IB_IP_PS_UDP & mask))) { + sid_ps = RDMA_IB_IP_PS_UDP; + ps = RDMA_PS_UDP; + } + + if (ps) { + sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); + sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | + be64_to_cpu(sib->sib_sid_mask)); + } + return ps; +} + +static int cma_get_port(struct rdma_id_private *id_priv) +{ + enum rdma_port_space ps; + int ret; + + if (cma_family(id_priv) != AF_IB) + ps = cma_select_inet_ps(id_priv); + else + ps = cma_select_ib_ps(id_priv); + if (!ps) + return -EPROTONOSUPPORT; + + mutex_lock(&lock); + if (cma_any_port(cma_src_addr(id_priv))) + ret = cma_alloc_any_port(ps, id_priv); + else + ret = cma_use_port(ps, id_priv); + mutex_unlock(&lock); + + return ret; +} + +static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, + struct sockaddr *addr) +{ +#ifdef INET6 + struct sockaddr_in6 sin6; + + if (addr->sa_family != AF_INET6) + return 0; + + sin6 = *(struct sockaddr_in6 *)addr; + + if (!(IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr))) + return 0; + + if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0) + return -EINVAL; + + dev_addr->bound_dev_if = sin6.sin6_scope_id; +#endif + return 0; +} + +int rdma_listen(struct rdma_cm_id *id, int backlog) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == RDMA_CM_IDLE) { + id->route.addr.src_addr.ss_family = AF_INET; + ret = rdma_bind_addr(id, cma_src_addr(id_priv)); + if (ret) + return ret; + } + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) + return -EINVAL; + + if (id_priv->reuseaddr) { + ret = cma_bind_listen(id_priv); + if (ret) + goto err; + } + + id_priv->backlog = backlog; + if (id->device) { + if (rdma_cap_ib_cm(id->device, 1)) { + ret = cma_ib_listen(id_priv); + if (ret) + goto err; + } else if (rdma_cap_iw_cm(id->device, 1)) { + ret = cma_iw_listen(id_priv, backlog); + if (ret) + goto err; + } else { + ret = -ENOSYS; + goto err; + } + } else + cma_listen_on_all(id_priv); + + return 0; +err: + id_priv->backlog = 0; + cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_listen); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv; + int ret; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && + addr->sa_family != AF_IB) + return -EAFNOSUPPORT; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) + return -EINVAL; + + ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + if (ret) + goto err1; + + memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); + if (!cma_any_addr(addr)) { + ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + if (ret) + goto err1; + + ret = cma_acquire_dev(id_priv, NULL); + if (ret) + goto err1; + } + + if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { + if (addr->sa_family == AF_INET) + id_priv->afonly = 1; +#ifdef INET6 + else if (addr->sa_family == AF_INET6) { + CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); + id_priv->afonly = V_ip6_v6only; + CURVNET_RESTORE(); + } +#endif + } + ret = cma_get_port(id_priv); + if (ret) + goto err2; + + return 0; +err2: + if (id_priv->cma_dev) + cma_release_dev(id_priv); +err1: + cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); + return ret; +} +EXPORT_SYMBOL(rdma_bind_addr); + +static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) +{ + struct cma_hdr *cma_hdr; + + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + if (cma_family(id_priv) == AF_INET) { + struct sockaddr_in *src4, *dst4; + + src4 = (struct sockaddr_in *) cma_src_addr(id_priv); + dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); + + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + } else if (cma_family(id_priv) == AF_INET6) { + struct sockaddr_in6 *src6, *dst6; + + src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); + dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); + + cma_set_ip_ver(cma_hdr, 6); + cma_hdr->src_addr.ip6 = src6->sin6_addr; + cma_hdr->dst_addr.ip6 = dst6->sin6_addr; + cma_hdr->port = src6->sin6_port; + } + return 0; +} + +static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *ib_event) +{ + struct rdma_id_private *id_priv = cm_id->context; + struct rdma_cm_event event; + struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; + int ret = 0; + + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_CONNECT) + goto out; + + memset(&event, 0, sizeof event); + switch (ib_event->event) { + case IB_CM_SIDR_REQ_ERROR: + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; + break; + case IB_CM_SIDR_REP_RECEIVED: + event.param.ud.private_data = ib_event->private_data; + event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; + if (rep->status != IB_SIDR_SUCCESS) { + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = ib_event->param.sidr_rep_rcvd.status; + break; + } + ret = cma_set_qkey(id_priv, rep->qkey); + if (ret) { + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = ret; + break; + } + ret = ib_init_ah_from_path(id_priv->id.device, + id_priv->id.port_num, + id_priv->id.route.path_rec, + &event.param.ud.ah_attr); + if (ret) { + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = ret; + break; + } + event.param.ud.qp_num = rep->qpn; + event.param.ud.qkey = rep->qkey; + event.event = RDMA_CM_EVENT_ESTABLISHED; + event.status = 0; + break; + default: + pr_err("RDMA CMA: unexpected IB CM event: %d\n", + ib_event->event); + goto out; + } + + ret = id_priv->id.event_handler(&id_priv->id, &event); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.ib = NULL; + cma_exch(id_priv, RDMA_CM_DESTROYING); + mutex_unlock(&id_priv->handler_mutex); + rdma_destroy_id(&id_priv->id); + return ret; + } +out: + mutex_unlock(&id_priv->handler_mutex); + return ret; +} + +static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_sidr_req_param req; + struct ib_cm_id *id; + void *private_data; + int offset, ret; + + memset(&req, 0, sizeof req); + offset = cma_user_data_offset(id_priv); + req.private_data_len = offset + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + + if (req.private_data_len) { + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + } else { + private_data = NULL; + } + + if (conn_param->private_data && conn_param->private_data_len) + memcpy((char *)private_data + offset, conn_param->private_data, + conn_param->private_data_len); + + if (private_data) { + ret = cma_format_hdr(private_data, id_priv); + if (ret) + goto out; + req.private_data = private_data; + } + + id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, + id_priv); + if (IS_ERR(id)) { + ret = PTR_ERR(id); + goto out; + } + id_priv->cm_id.ib = id; + + req.path = id_priv->id.route.path_rec; + req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); + req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); + req.max_cm_retries = CMA_MAX_CM_RETRIES; + + ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); + if (ret) { + ib_destroy_cm_id(id_priv->cm_id.ib); + id_priv->cm_id.ib = NULL; + } +out: + kfree(private_data); + return ret; +} + +static int cma_connect_ib(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_req_param req; + struct rdma_route *route; + void *private_data; + struct ib_cm_id *id; + int offset, ret; + + memset(&req, 0, sizeof req); + offset = cma_user_data_offset(id_priv); + req.private_data_len = offset + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + + if (req.private_data_len) { + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + } else { + private_data = NULL; + } + + if (conn_param->private_data && conn_param->private_data_len) + memcpy((char *)private_data + offset, conn_param->private_data, + conn_param->private_data_len); + + id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); + if (IS_ERR(id)) { + ret = PTR_ERR(id); + goto out; + } + id_priv->cm_id.ib = id; + + route = &id_priv->id.route; + if (private_data) { + ret = cma_format_hdr(private_data, id_priv); + if (ret) + goto out; + req.private_data = private_data; + } + + req.primary_path = &route->path_rec[0]; + if (route->num_paths == 2) + req.alternate_path = &route->path_rec[1]; + + req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); + req.qp_num = id_priv->qp_num; + req.qp_type = id_priv->id.qp_type; + req.starting_psn = id_priv->seq_num; + req.responder_resources = conn_param->responder_resources; + req.initiator_depth = conn_param->initiator_depth; + req.flow_control = conn_param->flow_control; + req.retry_count = min_t(u8, 7, conn_param->retry_count); + req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); + req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; + req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; + req.max_cm_retries = CMA_MAX_CM_RETRIES; + req.srq = id_priv->srq ? 1 : 0; + + ret = ib_send_cm_req(id_priv->cm_id.ib, &req); +out: + if (ret && !IS_ERR(id)) { + ib_destroy_cm_id(id); + id_priv->cm_id.ib = NULL; + } + + kfree(private_data); + return ret; +} + +static int cma_connect_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_id *cm_id; + int ret; + struct iw_cm_conn_param iw_param; + + cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); + + cm_id->tos = id_priv->tos; + id_priv->cm_id.iw = cm_id; + + memcpy(&cm_id->local_addr, cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); + memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), + rdma_addr_size(cma_dst_addr(id_priv))); + + ret = cma_modify_qp_rtr(id_priv, conn_param); + if (ret) + goto out; + + if (conn_param) { + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; + } else { + memset(&iw_param, 0, sizeof iw_param); + iw_param.qpn = id_priv->qp_num; + } + ret = iw_cm_connect(cm_id, &iw_param); +out: + if (ret) { + iw_destroy_cm_id(cm_id); + id_priv->cm_id.iw = NULL; + } + return ret; +} + +int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) + return -EINVAL; + + if (!id->qp) { + id_priv->qp_num = conn_param->qp_num; + id_priv->srq = conn_param->srq; + } + + if (rdma_cap_ib_cm(id->device, id->port_num)) { + if (id->qp_type == IB_QPT_UD) + ret = cma_resolve_ib_udp(id_priv, conn_param); + else + ret = cma_connect_ib(id_priv, conn_param); + } else if (rdma_cap_iw_cm(id->device, id->port_num)) + ret = cma_connect_iw(id_priv, conn_param); + else + ret = -ENOSYS; + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); + return ret; +} +EXPORT_SYMBOL(rdma_connect); + +static int cma_accept_ib(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct ib_cm_rep_param rep; + int ret; + + ret = cma_modify_qp_rtr(id_priv, conn_param); + if (ret) + goto out; + + ret = cma_modify_qp_rts(id_priv, conn_param); + if (ret) + goto out; + + memset(&rep, 0, sizeof rep); + rep.qp_num = id_priv->qp_num; + rep.starting_psn = id_priv->seq_num; + rep.private_data = conn_param->private_data; + rep.private_data_len = conn_param->private_data_len; + rep.responder_resources = conn_param->responder_resources; + rep.initiator_depth = conn_param->initiator_depth; + rep.failover_accepted = 0; + rep.flow_control = conn_param->flow_control; + rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); + rep.srq = id_priv->srq ? 1 : 0; + + ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); +out: + return ret; +} + +static int cma_accept_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_conn_param iw_param; + int ret; + + ret = cma_modify_qp_rtr(id_priv, conn_param); + if (ret) + return ret; + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) { + iw_param.qpn = id_priv->qp_num; + } else + iw_param.qpn = conn_param->qp_num; + + return iw_cm_accept(id_priv->cm_id.iw, &iw_param); +} + +static int cma_send_sidr_rep(struct rdma_id_private *id_priv, + enum ib_cm_sidr_status status, u32 qkey, + const void *private_data, int private_data_len) +{ + struct ib_cm_sidr_rep_param rep; + int ret; + + memset(&rep, 0, sizeof rep); + rep.status = status; + if (status == IB_SIDR_SUCCESS) { + ret = cma_set_qkey(id_priv, qkey); + if (ret) + return ret; + rep.qp_num = id_priv->qp_num; + rep.qkey = id_priv->qkey; + } + rep.private_data = private_data; + rep.private_data_len = private_data_len; + + return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); +} + +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + + id_priv->owner = task_pid_nr(current); + + if (!cma_comp(id_priv, RDMA_CM_CONNECT)) + return -EINVAL; + + if (!id->qp && conn_param) { + id_priv->qp_num = conn_param->qp_num; + id_priv->srq = conn_param->srq; + } + + if (rdma_cap_ib_cm(id->device, id->port_num)) { + if (id->qp_type == IB_QPT_UD) { + if (conn_param) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->qkey, + conn_param->private_data, + conn_param->private_data_len); + else + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + 0, NULL, 0); + } else { + if (conn_param) + ret = cma_accept_ib(id_priv, conn_param); + else + ret = cma_rep_recv(id_priv); + } + } else if (rdma_cap_iw_cm(id->device, id->port_num)) + ret = cma_accept_iw(id_priv, conn_param); + else + ret = -ENOSYS; + + if (ret) + goto reject; + + return 0; +reject: + cma_modify_qp_err(id_priv); + rdma_reject(id, NULL, 0); + return ret; +} +EXPORT_SYMBOL(rdma_accept); + +int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!id_priv->cm_id.ib) + return -EINVAL; + + switch (id->device->node_type) { + case RDMA_NODE_IB_CA: + ret = ib_cm_notify(id_priv->cm_id.ib, event); + break; + default: + ret = 0; + break; + } + return ret; +} +EXPORT_SYMBOL(rdma_notify); + +int rdma_reject(struct rdma_cm_id *id, const void *private_data, + u8 private_data_len) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!id_priv->cm_id.ib) + return -EINVAL; + + if (rdma_cap_ib_cm(id->device, id->port_num)) { + if (id->qp_type == IB_QPT_UD) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, + private_data, private_data_len); + else + ret = ib_send_cm_rej(id_priv->cm_id.ib, + IB_CM_REJ_CONSUMER_DEFINED, NULL, + 0, private_data, private_data_len); + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { + ret = iw_cm_reject(id_priv->cm_id.iw, + private_data, private_data_len); + } else + ret = -ENOSYS; + + return ret; +} +EXPORT_SYMBOL(rdma_reject); + +int rdma_disconnect(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!id_priv->cm_id.ib) + return -EINVAL; + + if (rdma_cap_ib_cm(id->device, id->port_num)) { + ret = cma_modify_qp_err(id_priv); + if (ret) + goto out; + /* Initiate or respond to a disconnect. */ + if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) + ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { + ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); + } else + ret = -EINVAL; + +out: + return ret; +} +EXPORT_SYMBOL(rdma_disconnect); + +static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) +{ + struct rdma_id_private *id_priv; + struct cma_multicast *mc = multicast->context; + struct rdma_cm_event event; + int ret = 0; + + id_priv = mc->id_priv; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_ADDR_BOUND && + id_priv->state != RDMA_CM_ADDR_RESOLVED) + goto out; + + if (!status) + status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); + mutex_lock(&id_priv->qp_mutex); + if (!status && id_priv->id.qp) + status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, + be16_to_cpu(multicast->rec.mlid)); + mutex_unlock(&id_priv->qp_mutex); + + memset(&event, 0, sizeof event); + event.status = status; + event.param.ud.private_data = mc->context; + if (!status) { + struct rdma_dev_addr *dev_addr = + &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = + dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + enum ib_gid_type gid_type = + id_priv->cma_dev->default_gid_type[id_priv->id.port_num - + rdma_start_port(id_priv->cma_dev->device)]; + + event.event = RDMA_CM_EVENT_MULTICAST_JOIN; + ib_init_ah_from_mcmember(id_priv->id.device, + id_priv->id.port_num, &multicast->rec, + ndev, gid_type, + &event.param.ud.ah_attr); + event.param.ud.qp_num = 0xFFFFFF; + event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); + if (ndev) + dev_put(ndev); + } else + event.event = RDMA_CM_EVENT_MULTICAST_ERROR; + + ret = id_priv->id.event_handler(&id_priv->id, &event); + if (ret) { + cma_exch(id_priv, RDMA_CM_DESTROYING); + mutex_unlock(&id_priv->handler_mutex); + rdma_destroy_id(&id_priv->id); + return 0; + } + +out: + mutex_unlock(&id_priv->handler_mutex); + return 0; +} + +static void cma_set_mgid(struct rdma_id_private *id_priv, + struct sockaddr *addr, union ib_gid *mgid) +{ + unsigned char mc_map[MAX_ADDR_LEN]; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + struct sockaddr_in *sin = (struct sockaddr_in *) addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; + + if (cma_any_addr(addr)) { + memset(mgid, 0, sizeof *mgid); + } else if ((addr->sa_family == AF_INET6) && + ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == + 0xFF10A01B)) { + /* IPv6 address is an SA assigned MGID. */ + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else if (addr->sa_family == AF_IB) { + memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); + } else if (addr->sa_family == AF_INET6) { + ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); + if (id_priv->id.ps == RDMA_PS_UDP) + mc_map[7] = 0x01; /* Use RDMA CM signature */ + *mgid = *(union ib_gid *) (mc_map + 4); + } else { + ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); + if (id_priv->id.ps == RDMA_PS_UDP) + mc_map[7] = 0x01; /* Use RDMA CM signature */ + *mgid = *(union ib_gid *) (mc_map + 4); + } +} + +static void cma_query_sa_classport_info_cb(int status, + struct ib_class_port_info *rec, + void *context) +{ + struct class_port_info_context *cb_ctx = context; + + WARN_ON(!context); + + if (status || !rec) { + pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", + cb_ctx->device->name, cb_ctx->port_num, status); + goto out; + } + + memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); + +out: + complete(&cb_ctx->done); +} + +static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, + struct ib_class_port_info *class_port_info) +{ + struct class_port_info_context *cb_ctx; + int ret; + + cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); + if (!cb_ctx) + return -ENOMEM; + + cb_ctx->device = device; + cb_ctx->class_port_info = class_port_info; + cb_ctx->port_num = port_num; + init_completion(&cb_ctx->done); + + ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, + CMA_QUERY_CLASSPORT_INFO_TIMEOUT, + GFP_KERNEL, cma_query_sa_classport_info_cb, + cb_ctx, &cb_ctx->sa_query); + if (ret < 0) { + pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", + device->name, port_num, ret); + goto out; + } + + wait_for_completion(&cb_ctx->done); + +out: + kfree(cb_ctx); + return ret; +} + +static int cma_join_ib_multicast(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct ib_sa_mcmember_rec rec; + struct ib_class_port_info class_port_info; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + ib_sa_comp_mask comp_mask; + int ret; + + ib_addr_get_mgid(dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, + &rec.mgid, &rec); + if (ret) + return ret; + + ret = cma_set_qkey(id_priv, 0); + if (ret) + return ret; + + cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); + rec.qkey = cpu_to_be32(id_priv->qkey); + rdma_addr_get_sgid(dev_addr, &rec.port_gid); + rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); + rec.join_state = mc->join_state; + + if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { + ret = cma_query_sa_classport_info(id_priv->id.device, + id_priv->id.port_num, + &class_port_info); + + if (ret) + return ret; + + if (!(ib_get_cpi_capmask2(&class_port_info) & + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { + pr_warn("RDMA CM: %s port %u Unable to multicast join\n" + "RDMA CM: SM doesn't support Send Only Full Member option\n", + id_priv->id.device->name, id_priv->id.port_num); + return -EOPNOTSUPP; + } + } + + comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | + IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | + IB_SA_MCMEMBER_REC_FLOW_LABEL | + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; + + if (id_priv->id.ps == RDMA_PS_IPOIB) + comp_mask |= IB_SA_MCMEMBER_REC_RATE | + IB_SA_MCMEMBER_REC_RATE_SELECTOR | + IB_SA_MCMEMBER_REC_MTU_SELECTOR | + IB_SA_MCMEMBER_REC_MTU | + IB_SA_MCMEMBER_REC_HOP_LIMIT; + + mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, + id_priv->id.port_num, &rec, + comp_mask, GFP_KERNEL, + cma_ib_mc_handler, mc); + return PTR_ERR_OR_ZERO(mc->multicast.ib); +} + +static void iboe_mcast_work_handler(struct work_struct *work) +{ + struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); + struct cma_multicast *mc = mw->mc; + struct ib_sa_multicast *m = mc->multicast.ib; + + mc->multicast.ib->context = mc; + cma_ib_mc_handler(0, m); + kref_put(&mc->mcref, release_mc); + kfree(mw); +} + +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + if (cma_any_addr(addr)) { + memset(mgid, 0, sizeof *mgid); + } else if (addr->sa_family == AF_INET6) { + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else { + mgid->raw[0] = 0xff; + mgid->raw[1] = 0x0e; + mgid->raw[2] = 0; + mgid->raw[3] = 0; + mgid->raw[4] = 0; + mgid->raw[5] = 0; + mgid->raw[6] = 0; + mgid->raw[7] = 0; + mgid->raw[8] = 0; + mgid->raw[9] = 0; + mgid->raw[10] = 0xff; + mgid->raw[11] = 0xff; + *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; + } +} + +static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct iboe_mcast_work *work; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int err = 0; + struct sockaddr *addr = (struct sockaddr *)&mc->addr; + struct net_device *ndev = NULL; + enum ib_gid_type gid_type; + bool send_only; + + send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); + + if (cma_zero_addr((struct sockaddr *)&mc->addr)) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); + if (!mc->multicast.ib) { + err = -ENOMEM; + goto out1; + } + + cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); + + mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); + if (id_priv->id.ps == RDMA_PS_UDP) + mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + if (!ndev) { + err = -ENODEV; + goto out2; + } + mc->multicast.ib->rec.rate = iboe_get_rate(ndev); + mc->multicast.ib->rec.hop_limit = 1; + mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); + + gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - + rdma_start_port(id_priv->cma_dev->device)]; + if (addr->sa_family == AF_INET) { + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + if (!send_only) { + mc->igmp_joined = true; + } + } + } else { + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + err = -ENOTSUPP; + } + dev_put(ndev); + if (err || !mc->multicast.ib->rec.mtu) { + if (!err) + err = -EINVAL; + goto out2; + } + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &mc->multicast.ib->rec.port_gid); + work->id = id_priv; + work->mc = mc; + INIT_WORK(&work->work, iboe_mcast_work_handler); + kref_get(&mc->mcref); + queue_work(cma_wq, &work->work); + + return 0; + +out2: + kfree(mc->multicast.ib); +out1: + kfree(work); + return err; +} + +int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, + u8 join_state, void *context) +{ + struct rdma_id_private *id_priv; + struct cma_multicast *mc; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && + !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) + return -EINVAL; + + mc = kmalloc(sizeof *mc, GFP_KERNEL); + if (!mc) + return -ENOMEM; + + memcpy(&mc->addr, addr, rdma_addr_size(addr)); + mc->context = context; + mc->id_priv = id_priv; + mc->igmp_joined = false; + mc->join_state = join_state; + spin_lock(&id_priv->lock); + list_add(&mc->list, &id_priv->mc_list); + spin_unlock(&id_priv->lock); + + if (rdma_protocol_roce(id->device, id->port_num)) { + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + ret = cma_join_ib_multicast(id_priv, mc); + else + ret = -ENOSYS; + + if (ret) { + spin_lock_irq(&id_priv->lock); + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); + kfree(mc); + } + return ret; +} +EXPORT_SYMBOL(rdma_join_multicast); + +void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv; + struct cma_multicast *mc; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irq(&id_priv->lock); + list_for_each_entry(mc, &id_priv->mc_list, list) { + if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); + + if (id->qp) + ib_detach_mcast(id->qp, + &mc->multicast.ib->rec.mgid, + be16_to_cpu(mc->multicast.ib->rec.mlid)); + + BUG_ON(id_priv->cma_dev->device != id->device); + + if (rdma_cap_ib_mcast(id->device, id->port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + } else if (rdma_protocol_roce(id->device, id->port_num)) { + if (mc->igmp_joined) { + struct rdma_dev_addr *dev_addr = + &id->route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, + dev_addr->bound_dev_if); + if (ndev) { + dev_put(ndev); + } + mc->igmp_joined = false; + } + kref_put(&mc->mcref, release_mc); + } + return; + } + } + spin_unlock_irq(&id_priv->lock); +} +EXPORT_SYMBOL(rdma_leave_multicast); + +static int +sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) +{ + struct cma_device *cma_dev = arg1; + const int port = arg2; + char buf[64]; + int error; + + strlcpy(buf, ib_cache_gid_type_str( + cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); + + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + goto done; + + error = ib_cache_gid_parse_type_str(buf); + if (error < 0) { + error = EINVAL; + goto done; + } + + cma_set_default_gid_type(cma_dev, port, error); + error = 0; +done: + return (error); +} + +static void cma_add_one(struct ib_device *device) +{ + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + unsigned int i; + unsigned long supported_gids = 0; + + cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); + if (!cma_dev) + return; + + sysctl_ctx_init(&cma_dev->sysctl_ctx); + + cma_dev->device = device; + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_gid_type), + GFP_KERNEL); + if (!cma_dev->default_gid_type) { + kfree(cma_dev); + return; + } + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + supported_gids = roce_gid_type_mask_support(device, i); + WARN_ON(!supported_gids); + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); + } + + init_completion(&cma_dev->comp); + atomic_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) + cma_listen_on_dev(id_priv, cma_dev); + mutex_unlock(&lock); + + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + char buf[64]; + + snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); + + (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, + SYSCTL_CHILDREN(device->ports_parent->parent->oidp), + OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + cma_dev, i, &sysctl_cma_default_roce_mode, "A", + "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); + } +} + +static int cma_remove_id_dev(struct rdma_id_private *id_priv) +{ + struct rdma_cm_event event; + enum rdma_cm_state state; + int ret = 0; + + /* Record that we want to remove the device */ + state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); + if (state == RDMA_CM_DESTROYING) + return 0; + + cma_cancel_operation(id_priv, state); + mutex_lock(&id_priv->handler_mutex); + + /* Check for destruction from another callback. */ + if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) + goto out; + + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; + ret = id_priv->id.event_handler(&id_priv->id, &event); +out: + mutex_unlock(&id_priv->handler_mutex); + return ret; +} + +static void cma_process_remove(struct cma_device *cma_dev) +{ + struct rdma_id_private *id_priv; + int ret; + + mutex_lock(&lock); + while (!list_empty(&cma_dev->id_list)) { + id_priv = list_entry(cma_dev->id_list.next, + struct rdma_id_private, list); + + list_del(&id_priv->listen_list); + list_del_init(&id_priv->list); + atomic_inc(&id_priv->refcount); + mutex_unlock(&lock); + + ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); + cma_deref_id(id_priv); + if (ret) + rdma_destroy_id(&id_priv->id); + + mutex_lock(&lock); + } + mutex_unlock(&lock); + + cma_deref_dev(cma_dev); + wait_for_completion(&cma_dev->comp); +} + +static void cma_remove_one(struct ib_device *device, void *client_data) +{ + struct cma_device *cma_dev = client_data; + + if (!cma_dev) + return; + + mutex_lock(&lock); + list_del(&cma_dev->list); + mutex_unlock(&lock); + + cma_process_remove(cma_dev); + sysctl_ctx_free(&cma_dev->sysctl_ctx); + kfree(cma_dev->default_gid_type); + kfree(cma_dev); +} + +static void cma_init_vnet(void *arg) +{ + struct cma_pernet *pernet = &VNET(cma_pernet); + + idr_init(&pernet->tcp_ps); + idr_init(&pernet->udp_ps); + idr_init(&pernet->ipoib_ps); + idr_init(&pernet->ib_ps); +} +VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); + +static void cma_destroy_vnet(void *arg) +{ + struct cma_pernet *pernet = &VNET(cma_pernet); + + idr_destroy(&pernet->tcp_ps); + idr_destroy(&pernet->udp_ps); + idr_destroy(&pernet->ipoib_ps); + idr_destroy(&pernet->ib_ps); +} +VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); + +static int __init cma_init(void) +{ + int ret; + + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); + if (!cma_wq) + return -ENOMEM; + + ib_sa_register_client(&sa_client); + rdma_addr_register_client(&addr_client); + + ret = ib_register_client(&cma_client); + if (ret) + goto err; + + cma_configfs_init(); + + return 0; + +err: + rdma_addr_unregister_client(&addr_client); + ib_sa_unregister_client(&sa_client); + destroy_workqueue(cma_wq); + return ret; +} + +static void __exit cma_cleanup(void) +{ + cma_configfs_exit(); + ib_unregister_client(&cma_client); + rdma_addr_unregister_client(&addr_client); + ib_sa_unregister_client(&sa_client); + destroy_workqueue(cma_wq); +} + +module_init(cma_init); +module_exit(cma_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_cma.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_device.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_device.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_device.c (revision 320592) @@ -0,0 +1,1048 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core_priv.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("core kernel InfiniBand API"); +MODULE_LICENSE("Dual BSD/GPL"); + +struct ib_client_data { + struct list_head list; + struct ib_client *client; + void * data; + /* The device or client is going down. Do not call client or device + * callbacks other than remove(). */ + bool going_down; +}; + +struct workqueue_struct *ib_comp_wq; +struct workqueue_struct *ib_wq; +EXPORT_SYMBOL_GPL(ib_wq); + +/* The device_list and client_list contain devices and clients after their + * registration has completed, and the devices and clients are removed + * during unregistration. */ +static LIST_HEAD(device_list); +static LIST_HEAD(client_list); + +/* + * device_mutex and lists_rwsem protect access to both device_list and + * client_list. device_mutex protects writer access by device and client + * registration / de-registration. lists_rwsem protects reader access to + * these lists. Iterators of these lists must lock it for read, while updates + * to the lists must be done with a write lock. A special case is when the + * device_mutex is locked. In this case locking the lists for read access is + * not necessary as the device_mutex implies it. + * + * lists_rwsem also protects access to the client data list. + */ +static DEFINE_MUTEX(device_mutex); +static DECLARE_RWSEM(lists_rwsem); + + +static int ib_device_check_mandatory(struct ib_device *device) +{ +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } + static const struct { + size_t offset; + char *name; + } mandatory_table[] = { + IB_MANDATORY_FUNC(query_device), + IB_MANDATORY_FUNC(query_port), + IB_MANDATORY_FUNC(query_pkey), + IB_MANDATORY_FUNC(query_gid), + IB_MANDATORY_FUNC(alloc_pd), + IB_MANDATORY_FUNC(dealloc_pd), + IB_MANDATORY_FUNC(create_ah), + IB_MANDATORY_FUNC(destroy_ah), + IB_MANDATORY_FUNC(create_qp), + IB_MANDATORY_FUNC(modify_qp), + IB_MANDATORY_FUNC(destroy_qp), + IB_MANDATORY_FUNC(post_send), + IB_MANDATORY_FUNC(post_recv), + IB_MANDATORY_FUNC(create_cq), + IB_MANDATORY_FUNC(destroy_cq), + IB_MANDATORY_FUNC(poll_cq), + IB_MANDATORY_FUNC(req_notify_cq), + IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(dereg_mr), + IB_MANDATORY_FUNC(get_port_immutable) + }; + int i; + + for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { + if (!*(void **) ((char *) device + mandatory_table[i].offset)) { + pr_warn("Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name); + return -EINVAL; + } + } + + return 0; +} + +static struct ib_device *__ib_device_get_by_name(const char *name) +{ + struct ib_device *device; + + list_for_each_entry(device, &device_list, core_list) + if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) + return device; + + return NULL; +} + + +static int alloc_name(char *name) +{ + unsigned long *inuse; + char buf[IB_DEVICE_NAME_MAX]; + struct ib_device *device; + int i; + + inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!inuse) + return -ENOMEM; + + list_for_each_entry(device, &device_list, core_list) { + if (!sscanf(device->name, name, &i)) + continue; + if (i < 0 || i >= PAGE_SIZE * 8) + continue; + snprintf(buf, sizeof buf, name, i); + if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) + set_bit(i, inuse); + } + + i = find_first_zero_bit(inuse, PAGE_SIZE * 8); + free_page((unsigned long) inuse); + snprintf(buf, sizeof buf, name, i); + + if (__ib_device_get_by_name(buf)) + return -ENFILE; + + strlcpy(name, buf, IB_DEVICE_NAME_MAX); + return 0; +} + +static void ib_device_release(struct device *device) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + ib_cache_release_one(dev); + kfree(dev->port_immutable); + kfree(dev); +} + +static struct class ib_class = { + .name = "infiniband", + .dev_release = ib_device_release, +}; + +/** + * ib_alloc_device - allocate an IB device struct + * @size:size of structure to allocate + * + * Low-level drivers should use ib_alloc_device() to allocate &struct + * ib_device. @size is the size of the structure to be allocated, + * including any private data used by the low-level driver. + * ib_dealloc_device() must be used to free structures allocated with + * ib_alloc_device(). + */ +struct ib_device *ib_alloc_device(size_t size) +{ + struct ib_device *device; + + if (WARN_ON(size < sizeof(struct ib_device))) + return NULL; + + device = kzalloc(size, GFP_KERNEL); + if (!device) + return NULL; + + device->dev.parent = &linux_root_device; + device->dev.class = &ib_class; + device_initialize(&device->dev); + + dev_set_drvdata(&device->dev, device); + + INIT_LIST_HEAD(&device->event_handler_list); + spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->client_data_lock); + INIT_LIST_HEAD(&device->client_data_list); + INIT_LIST_HEAD(&device->port_list); + + return device; +} +EXPORT_SYMBOL(ib_alloc_device); + +/** + * ib_dealloc_device - free an IB device struct + * @device:structure to free + * + * Free a structure allocated with ib_alloc_device(). + */ +void ib_dealloc_device(struct ib_device *device) +{ + WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && + device->reg_state != IB_DEV_UNINITIALIZED); + kobject_put(&device->dev.kobj); +} +EXPORT_SYMBOL(ib_dealloc_device); + +static int add_client_context(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + unsigned long flags; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + pr_warn("Couldn't allocate client context for %s/%s\n", + device->name, client->name); + return -ENOMEM; + } + + context->client = client; + context->data = NULL; + context->going_down = false; + + down_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_add(&context->list, &device->client_data_list); + spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); + + return 0; +} + +static int verify_immutable(const struct ib_device *dev, u8 port) +{ + return WARN_ON(!rdma_cap_ib_mad(dev, port) && + rdma_max_mad_size(dev, port) != 0); +} + +static int read_port_immutable(struct ib_device *device) +{ + int ret; + u8 start_port = rdma_start_port(device); + u8 end_port = rdma_end_port(device); + u8 port; + + /** + * device->port_immutable is indexed directly by the port number to make + * access to this data as efficient as possible. + * + * Therefore port_immutable is declared as a 1 based array with + * potential empty slots at the beginning. + */ + device->port_immutable = kzalloc(sizeof(*device->port_immutable) + * (end_port + 1), + GFP_KERNEL); + if (!device->port_immutable) + return -ENOMEM; + + for (port = start_port; port <= end_port; ++port) { + ret = device->get_port_immutable(device, port, + &device->port_immutable[port]); + if (ret) + return ret; + + if (verify_immutable(device, port)) + return -EINVAL; + } + return 0; +} + +void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +{ + if (dev->get_dev_fw_str) + dev->get_dev_fw_str(dev, str, str_len); + else + str[0] = '\0'; +} +EXPORT_SYMBOL(ib_get_device_fw_str); + +/** + * ib_register_device - Register an IB device with IB core + * @device:Device to register + * + * Low-level drivers use ib_register_device() to register their + * devices with the IB core. All registered clients will receive a + * callback for each device that is added. @device must be allocated + * with ib_alloc_device(). + */ +int ib_register_device(struct ib_device *device, + int (*port_callback)(struct ib_device *, + u8, struct kobject *)) +{ + int ret; + struct ib_client *client; + struct ib_udata uhw = {.outlen = 0, .inlen = 0}; + + mutex_lock(&device_mutex); + + if (strchr(device->name, '%')) { + ret = alloc_name(device->name); + if (ret) + goto out; + } + + if (ib_device_check_mandatory(device)) { + ret = -EINVAL; + goto out; + } + + ret = read_port_immutable(device); + if (ret) { + pr_warn("Couldn't create per port immutable data %s\n", + device->name); + goto out; + } + + ret = ib_cache_setup_one(device); + if (ret) { + pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); + goto out; + } + + memset(&device->attrs, 0, sizeof(device->attrs)); + ret = device->query_device(device, &device->attrs, &uhw); + if (ret) { + pr_warn("Couldn't query the device attributes\n"); + ib_cache_cleanup_one(device); + goto out; + } + + ret = ib_device_register_sysfs(device, port_callback); + if (ret) { + pr_warn("Couldn't register device %s with driver model\n", + device->name); + ib_cache_cleanup_one(device); + goto out; + } + + device->reg_state = IB_DEV_REGISTERED; + + list_for_each_entry(client, &client_list, list) + if (client->add && !add_client_context(device, client)) + client->add(device); + + down_write(&lists_rwsem); + list_add_tail(&device->core_list, &device_list); + up_write(&lists_rwsem); +out: + mutex_unlock(&device_mutex); + return ret; +} +EXPORT_SYMBOL(ib_register_device); + +/** + * ib_unregister_device - Unregister an IB device + * @device:Device to unregister + * + * Unregister an IB device. All clients will receive a remove callback. + */ +void ib_unregister_device(struct ib_device *device) +{ + struct ib_client_data *context, *tmp; + unsigned long flags; + + mutex_lock(&device_mutex); + + down_write(&lists_rwsem); + list_del(&device->core_list); + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + context->going_down = true; + spin_unlock_irqrestore(&device->client_data_lock, flags); + downgrade_write(&lists_rwsem); + + list_for_each_entry_safe(context, tmp, &device->client_data_list, + list) { + if (context->client->remove) + context->client->remove(device, context->data); + } + up_read(&lists_rwsem); + + mutex_unlock(&device_mutex); + + ib_device_unregister_sysfs(device); + ib_cache_cleanup_one(device); + + down_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + kfree(context); + spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); + + device->reg_state = IB_DEV_UNREGISTERED; +} +EXPORT_SYMBOL(ib_unregister_device); + +/** + * ib_register_client - Register an IB client + * @client:Client to register + * + * Upper level users of the IB drivers can use ib_register_client() to + * register callbacks for IB device addition and removal. When an IB + * device is added, each registered client's add method will be called + * (in the order the clients were registered), and when a device is + * removed, each client's remove method will be called (in the reverse + * order that clients were registered). In addition, when + * ib_register_client() is called, the client will receive an add + * callback for all devices already registered. + */ +int ib_register_client(struct ib_client *client) +{ + struct ib_device *device; + + mutex_lock(&device_mutex); + + list_for_each_entry(device, &device_list, core_list) + if (client->add && !add_client_context(device, client)) + client->add(device); + + down_write(&lists_rwsem); + list_add_tail(&client->list, &client_list); + up_write(&lists_rwsem); + + mutex_unlock(&device_mutex); + + return 0; +} +EXPORT_SYMBOL(ib_register_client); + +/** + * ib_unregister_client - Unregister an IB client + * @client:Client to unregister + * + * Upper level users use ib_unregister_client() to remove their client + * registration. When ib_unregister_client() is called, the client + * will receive a remove callback for each IB device still registered. + */ +void ib_unregister_client(struct ib_client *client) +{ + struct ib_client_data *context, *tmp; + struct ib_device *device; + unsigned long flags; + + mutex_lock(&device_mutex); + + down_write(&lists_rwsem); + list_del(&client->list); + up_write(&lists_rwsem); + + list_for_each_entry(device, &device_list, core_list) { + struct ib_client_data *found_context = NULL; + + down_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + if (context->client == client) { + context->going_down = true; + found_context = context; + break; + } + spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); + + if (client->remove) + client->remove(device, found_context ? + found_context->data : NULL); + + if (!found_context) { + pr_warn("No client context found for %s/%s\n", + device->name, client->name); + continue; + } + + down_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_del(&found_context->list); + kfree(found_context); + spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); + } + + mutex_unlock(&device_mutex); +} +EXPORT_SYMBOL(ib_unregister_client); + +/** + * ib_get_client_data - Get IB client context + * @device:Device to get context for + * @client:Client to get context for + * + * ib_get_client_data() returns client context set with + * ib_set_client_data(). + */ +void *ib_get_client_data(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + void *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry(context, &device->client_data_list, list) + if (context->client == client) { + ret = context->data; + break; + } + spin_unlock_irqrestore(&device->client_data_lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_get_client_data); + +/** + * ib_set_client_data - Set IB client context + * @device:Device to set context for + * @client:Client to set context for + * @data:Context to set + * + * ib_set_client_data() sets client context that can be retrieved with + * ib_get_client_data(). + */ +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data) +{ + struct ib_client_data *context; + unsigned long flags; + + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry(context, &device->client_data_list, list) + if (context->client == client) { + context->data = data; + goto out; + } + + pr_warn("No client context found for %s/%s\n", + device->name, client->name); + +out: + spin_unlock_irqrestore(&device->client_data_lock, flags); +} +EXPORT_SYMBOL(ib_set_client_data); + +/** + * ib_register_event_handler - Register an IB event handler + * @event_handler:Handler to register + * + * ib_register_event_handler() registers an event handler that will be + * called back when asynchronous IB events occur (as defined in + * chapter 11 of the InfiniBand Architecture Specification). This + * callback may occur in interrupt context. + */ +int ib_register_event_handler (struct ib_event_handler *event_handler) +{ + unsigned long flags; + + spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + list_add_tail(&event_handler->list, + &event_handler->device->event_handler_list); + spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_register_event_handler); + +/** + * ib_unregister_event_handler - Unregister an event handler + * @event_handler:Handler to unregister + * + * Unregister an event handler registered with + * ib_register_event_handler(). + */ +int ib_unregister_event_handler(struct ib_event_handler *event_handler) +{ + unsigned long flags; + + spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + list_del(&event_handler->list); + spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_unregister_event_handler); + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(struct ib_event *event) +{ + unsigned long flags; + struct ib_event_handler *handler; + + spin_lock_irqsave(&event->device->event_handler_lock, flags); + + list_for_each_entry(handler, &event->device->event_handler_list, list) + handler->handler(handler, event); + + spin_unlock_irqrestore(&event->device->event_handler_lock, flags); +} +EXPORT_SYMBOL(ib_dispatch_event); + +/** + * ib_query_port - Query IB port attributes + * @device:Device to query + * @port_num:Port number to query + * @port_attr:Port attributes + * + * ib_query_port() returns the attributes of a port through the + * @port_attr pointer. + */ +int ib_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) +{ + union ib_gid gid; + int err; + + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + return -EINVAL; + + memset(port_attr, 0, sizeof(*port_attr)); + err = device->query_port(device, port_num, port_attr); + if (err || port_attr->subnet_prefix) + return err; + + if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) + return 0; + + err = ib_query_gid(device, port_num, 0, &gid, NULL); + if (err) + return err; + + port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); + return 0; +} +EXPORT_SYMBOL(ib_query_port); + +/** + * ib_query_gid - Get GID table entry + * @device:Device to query + * @port_num:Port number to query + * @index:GID table index to query + * @gid:Returned GID + * @attr: Returned GID attributes related to this GID index (only in RoCE). + * NULL means ignore. + * + * ib_query_gid() fetches the specified GID table entry. + */ +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid, + struct ib_gid_attr *attr) +{ + if (rdma_cap_roce_gid_table(device, port_num)) + return ib_get_cached_gid(device, port_num, index, gid, attr); + + if (attr) + return -EINVAL; + + return device->query_gid(device, port_num, index, gid); +} +EXPORT_SYMBOL(ib_query_gid); + +/** + * ib_enum_roce_netdev - enumerate all RoCE ports + * @ib_dev : IB device we want to query + * @filter: Should we call the callback? + * @filter_cookie: Cookie passed to filter + * @cb: Callback to call for each found RoCE ports + * @cookie: Cookie passed back to the callback + * + * Enumerates all of the physical RoCE ports of ib_dev + * which are related to netdevice and calls callback() on each + * device for which filter() function returns non zero. + */ +void ib_enum_roce_netdev(struct ib_device *ib_dev, + roce_netdev_filter filter, + void *filter_cookie, + roce_netdev_callback cb, + void *cookie) +{ + u8 port; + + for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); + port++) + if (rdma_protocol_roce(ib_dev, port)) { + struct net_device *idev = NULL; + + if (ib_dev->get_netdev) + idev = ib_dev->get_netdev(ib_dev, port); + + if (idev && (idev->if_flags & IFF_DYING)) { + dev_put(idev); + idev = NULL; + } + + if (filter(ib_dev, port, idev, filter_cookie)) + cb(ib_dev, port, idev, cookie); + + if (idev) + dev_put(idev); + } +} + +/** + * ib_enum_all_roce_netdevs - enumerate all RoCE devices + * @filter: Should we call the callback? + * @filter_cookie: Cookie passed to filter + * @cb: Callback to call for each found RoCE ports + * @cookie: Cookie passed back to the callback + * + * Enumerates all RoCE devices' physical ports which are related + * to netdevices and calls callback() on each device for which + * filter() function returns non zero. + */ +void ib_enum_all_roce_netdevs(roce_netdev_filter filter, + void *filter_cookie, + roce_netdev_callback cb, + void *cookie) +{ + struct ib_device *dev; + + down_read(&lists_rwsem); + list_for_each_entry(dev, &device_list, core_list) + ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); + up_read(&lists_rwsem); +} + +/** + * ib_cache_gid_del_all_by_netdev - delete GIDs belonging a netdevice + * + * @ndev: Pointer to netdevice + */ +void ib_cache_gid_del_all_by_netdev(struct net_device *ndev) +{ + struct ib_device *ib_dev; + u8 port; + + down_read(&lists_rwsem); + list_for_each_entry(ib_dev, &device_list, core_list) { + for (port = rdma_start_port(ib_dev); + port <= rdma_end_port(ib_dev); + port++) { + if (rdma_protocol_roce(ib_dev, port) == 0) + continue; + (void) ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); + } + } + up_read(&lists_rwsem); +} + +/** + * ib_query_pkey - Get P_Key table entry + * @device:Device to query + * @port_num:Port number to query + * @index:P_Key table index to query + * @pkey:Returned P_Key + * + * ib_query_pkey() fetches the specified P_Key table entry. + */ +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey) +{ + return device->query_pkey(device, port_num, index, pkey); +} +EXPORT_SYMBOL(ib_query_pkey); + +/** + * ib_modify_device - Change IB device attributes + * @device:Device to modify + * @device_modify_mask:Mask of attributes to change + * @device_modify:New attribute values + * + * ib_modify_device() changes a device's attributes as specified by + * the @device_modify_mask and @device_modify structure. + */ +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + if (!device->modify_device) + return -ENOSYS; + + return device->modify_device(device, device_modify_mask, + device_modify); +} +EXPORT_SYMBOL(ib_modify_device); + +/** + * ib_modify_port - Modifies the attributes for the specified port. + * @device: The device to modify. + * @port_num: The number of the port to modify. + * @port_modify_mask: Mask used to specify which attributes of the port + * to change. + * @port_modify: New attribute values for the port. + * + * ib_modify_port() changes a port's attributes as specified by the + * @port_modify_mask and @port_modify structure. + */ +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify) +{ + if (!device->modify_port) + return -ENOSYS; + + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) + return -EINVAL; + + return device->modify_port(device, port_num, port_modify_mask, + port_modify); +} +EXPORT_SYMBOL(ib_modify_port); + +/** + * ib_find_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @gid_type: Type of GID. + * @ndev: The ndev related to the GID to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the GID table where the GID was found. This + * parameter may be NULL. + */ +int ib_find_gid(struct ib_device *device, union ib_gid *gid, + enum ib_gid_type gid_type, struct net_device *ndev, + u8 *port_num, u16 *index) +{ + union ib_gid tmp_gid; + int ret, port, i; + + for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { + if (rdma_cap_roce_gid_table(device, port)) { + if (!ib_find_cached_gid_by_port(device, gid, gid_type, port, + ndev, index)) { + *port_num = port; + return 0; + } + } + + if (gid_type != IB_GID_TYPE_IB) + continue; + + for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { + ret = ib_query_gid(device, port, i, &tmp_gid, NULL); + if (ret) + return ret; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { + *port_num = port; + if (index) + *index = i; + return 0; + } + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_gid); + +/** + * ib_find_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the PKey table where the PKey was found. + */ +int ib_find_pkey(struct ib_device *device, + u8 port_num, u16 pkey, u16 *index) +{ + int ret, i; + u16 tmp_pkey; + int partial_ix = -1; + + for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { + ret = ib_query_pkey(device, port_num, i, &tmp_pkey); + if (ret) + return ret; + if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { + /* if there is full-member pkey take it.*/ + if (tmp_pkey & 0x8000) { + *index = i; + return 0; + } + if (partial_ix < 0) + partial_ix = i; + } + } + + /*no full-member, if exists take the limited*/ + if (partial_ix >= 0) { + *index = partial_ix; + return 0; + } + return -ENOENT; +} +EXPORT_SYMBOL(ib_find_pkey); + +/** + * ib_get_net_dev_by_params() - Return the appropriate net_dev + * for a received CM request + * @dev: An RDMA device on which the request has been received. + * @port: Port number on the RDMA device. + * @pkey: The Pkey the request came on. + * @gid: A GID that the net_dev uses to communicate. + * @addr: Contains the IP address that the request specified as its + * destination. + */ +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, + u8 port, + u16 pkey, + const union ib_gid *gid, + const struct sockaddr *addr) +{ + struct net_device *net_dev = NULL; + struct ib_client_data *context; + + if (!rdma_protocol_ib(dev, port)) + return NULL; + + down_read(&lists_rwsem); + + list_for_each_entry(context, &dev->client_data_list, list) { + struct ib_client *client = context->client; + + if (context->going_down) + continue; + + if (client->get_net_dev_by_params) { + net_dev = client->get_net_dev_by_params(dev, port, pkey, + gid, addr, + context->data); + if (net_dev) + break; + } + } + + up_read(&lists_rwsem); + + return net_dev; +} +EXPORT_SYMBOL(ib_get_net_dev_by_params); + +static int __init ib_core_init(void) +{ + int ret; + + ib_wq = alloc_workqueue("infiniband", 0, 0); + if (!ib_wq) + return -ENOMEM; + + ib_comp_wq = alloc_workqueue("ib-comp-wq", + WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, + mp_ncpus * 4 /* WQ_UNBOUND_MAX_ACTIVE */); + if (!ib_comp_wq) { + ret = -ENOMEM; + goto err; + } + + ret = class_register(&ib_class); + if (ret) { + pr_warn("Couldn't create InfiniBand device class\n"); + goto err_comp; + } + + ret = addr_init(); + if (ret) { + pr_warn("Could't init IB address resolution\n"); + goto err_sysfs; + } + + ret = ib_mad_init(); + if (ret) { + pr_warn("Couldn't init IB MAD\n"); + goto err_addr; + } + + ret = ib_sa_init(); + if (ret) { + pr_warn("Couldn't init SA\n"); + goto err_mad; + } + + ib_cache_setup(); + + return 0; + +err_mad: + ib_mad_cleanup(); +err_addr: + addr_cleanup(); +err_sysfs: + class_unregister(&ib_class); +err_comp: + destroy_workqueue(ib_comp_wq); +err: + destroy_workqueue(ib_wq); + return ret; +} + +static void __exit ib_core_cleanup(void) +{ + ib_cache_cleanup(); + ib_sa_cleanup(); + ib_mad_cleanup(); + addr_cleanup(); + class_unregister(&ib_class); + destroy_workqueue(ib_comp_wq); + /* Make sure that any pending umem accounting work is done. */ + destroy_workqueue(ib_wq); +} + +module_init(ib_core_init); +module_exit(ib_core_cleanup); + +MODULE_VERSION(ibcore, 1); +MODULE_DEPEND(ibcore, linuxkpi, 1, 1, 1); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_device.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c (revision 320592) @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +#include "core_priv.h" + +#define PFX "fmr_pool: " + +enum { + IB_FMR_MAX_REMAPS = 32, + + IB_FMR_HASH_BITS = 8, + IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, + IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 +}; + +/* + * If an FMR is not in use, then the list member will point to either + * its pool's free_list (if the FMR can be mapped again; that is, + * remap_count < pool->max_remaps) or its pool's dirty_list (if the + * FMR needs to be unmapped before being remapped). In either of + * these cases it is a bug if the ref_count is not 0. In other words, + * if ref_count is > 0, then the list member must not be linked into + * either free_list or dirty_list. + * + * The cache_node member is used to link the FMR into a cache bucket + * (if caching is enabled). This is independent of the reference + * count of the FMR. When a valid FMR is released, its ref_count is + * decremented, and if ref_count reaches 0, the FMR is placed in + * either free_list or dirty_list as appropriate. However, it is not + * removed from the cache and may be "revived" if a call to + * ib_fmr_register_physical() occurs before the FMR is remapped. In + * this case we just increment the ref_count and remove the FMR from + * free_list/dirty_list. + * + * Before we remap an FMR from free_list, we remove it from the cache + * (to prevent another user from obtaining a stale FMR). When an FMR + * is released, we add it to the tail of the free list, so that our + * cache eviction policy is "least recently used." + * + * All manipulation of ref_count, list and cache_node is protected by + * pool_lock to maintain consistency. + */ + +struct ib_fmr_pool { + spinlock_t pool_lock; + + int pool_size; + int max_pages; + int max_remaps; + int dirty_watermark; + int dirty_len; + struct list_head free_list; + struct list_head dirty_list; + struct hlist_head *cache_bucket; + + void (*flush_function)(struct ib_fmr_pool *pool, + void * arg); + void *flush_arg; + + struct task_struct *thread; + + atomic_t req_ser; + atomic_t flush_ser; + + wait_queue_head_t force_wait; +}; + +static inline u32 ib_fmr_hash(u64 first_page) +{ + return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & + (IB_FMR_HASH_SIZE - 1); +} + +/* Caller must hold pool_lock */ +static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, + u64 *page_list, + int page_list_len, + u64 io_virtual_address) +{ + struct hlist_head *bucket; + struct ib_pool_fmr *fmr; + + if (!pool->cache_bucket) + return NULL; + + bucket = pool->cache_bucket + ib_fmr_hash(*page_list); + + hlist_for_each_entry(fmr, bucket, cache_node) + if (io_virtual_address == fmr->io_virtual_address && + page_list_len == fmr->page_list_len && + !memcmp(page_list, fmr->page_list, + page_list_len * sizeof *page_list)) + return fmr; + + return NULL; +} + +static void ib_fmr_batch_release(struct ib_fmr_pool *pool) +{ + int ret; + struct ib_pool_fmr *fmr; + LIST_HEAD(unmap_list); + LIST_HEAD(fmr_list); + + spin_lock_irq(&pool->pool_lock); + + list_for_each_entry(fmr, &pool->dirty_list, list) { + hlist_del_init(&fmr->cache_node); + fmr->remap_count = 0; + list_add_tail(&fmr->fmr->list, &fmr_list); + +#ifdef DEBUG + if (fmr->ref_count !=0) { + pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n", + fmr, fmr->ref_count); + } +#endif + } + + list_splice_init(&pool->dirty_list, &unmap_list); + pool->dirty_len = 0; + + spin_unlock_irq(&pool->pool_lock); + + if (list_empty(&unmap_list)) { + return; + } + + ret = ib_unmap_fmr(&fmr_list); + if (ret) + pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); + + spin_lock_irq(&pool->pool_lock); + list_splice(&unmap_list, &pool->free_list); + spin_unlock_irq(&pool->pool_lock); +} + +static int ib_fmr_cleanup_thread(void *pool_ptr) +{ + struct ib_fmr_pool *pool = pool_ptr; + + do { + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { + ib_fmr_batch_release(pool); + + atomic_inc(&pool->flush_ser); + wake_up_interruptible(&pool->force_wait); + + if (pool->flush_function) + pool->flush_function(pool, pool->flush_arg); + } + + set_current_state(TASK_INTERRUPTIBLE); + if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && + !kthread_should_stop()) + schedule(); + __set_current_state(TASK_RUNNING); + } while (!kthread_should_stop()); + + return 0; +} + +/** + * ib_create_fmr_pool - Create an FMR pool + * @pd:Protection domain for FMRs + * @params:FMR pool parameters + * + * Create a pool of FMRs. Return value is pointer to new pool or + * error code if creation failed. + */ +struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, + struct ib_fmr_pool_param *params) +{ + struct ib_device *device; + struct ib_fmr_pool *pool; + int i; + int ret; + int max_remaps; + + if (!params) + return ERR_PTR(-EINVAL); + + device = pd->device; + if (!device->alloc_fmr || !device->dealloc_fmr || + !device->map_phys_fmr || !device->unmap_fmr) { + pr_info(PFX "Device %s does not support FMRs\n", device->name); + return ERR_PTR(-ENOSYS); + } + + if (!device->attrs.max_map_per_fmr) + max_remaps = IB_FMR_MAX_REMAPS; + else + max_remaps = device->attrs.max_map_per_fmr; + + pool = kmalloc(sizeof *pool, GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + pool->cache_bucket = NULL; + pool->flush_function = params->flush_function; + pool->flush_arg = params->flush_arg; + + INIT_LIST_HEAD(&pool->free_list); + INIT_LIST_HEAD(&pool->dirty_list); + + if (params->cache) { + pool->cache_bucket = + kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, + GFP_KERNEL); + if (!pool->cache_bucket) { + pr_warn(PFX "Failed to allocate cache in pool\n"); + ret = -ENOMEM; + goto out_free_pool; + } + + for (i = 0; i < IB_FMR_HASH_SIZE; ++i) + INIT_HLIST_HEAD(pool->cache_bucket + i); + } + + pool->pool_size = 0; + pool->max_pages = params->max_pages_per_fmr; + pool->max_remaps = max_remaps; + pool->dirty_watermark = params->dirty_watermark; + pool->dirty_len = 0; + spin_lock_init(&pool->pool_lock); + atomic_set(&pool->req_ser, 0); + atomic_set(&pool->flush_ser, 0); + init_waitqueue_head(&pool->force_wait); + + pool->thread = kthread_run(ib_fmr_cleanup_thread, + pool, + "ib_fmr(%s)", + device->name); + if (IS_ERR(pool->thread)) { + pr_warn(PFX "couldn't start cleanup thread\n"); + ret = PTR_ERR(pool->thread); + goto out_free_pool; + } + + { + struct ib_pool_fmr *fmr; + struct ib_fmr_attr fmr_attr = { + .max_pages = params->max_pages_per_fmr, + .max_maps = pool->max_remaps, + .page_shift = params->page_shift + }; + int bytes_per_fmr = sizeof *fmr; + + if (pool->cache_bucket) + bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); + + for (i = 0; i < params->pool_size; ++i) { + fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); + if (!fmr) + goto out_fail; + + fmr->pool = pool; + fmr->remap_count = 0; + fmr->ref_count = 0; + INIT_HLIST_NODE(&fmr->cache_node); + + fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); + if (IS_ERR(fmr->fmr)) { + pr_warn(PFX "fmr_create failed for FMR %d\n", + i); + kfree(fmr); + goto out_fail; + } + + list_add_tail(&fmr->list, &pool->free_list); + ++pool->pool_size; + } + } + + return pool; + + out_free_pool: + kfree(pool->cache_bucket); + kfree(pool); + + return ERR_PTR(ret); + + out_fail: + ib_destroy_fmr_pool(pool); + + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(ib_create_fmr_pool); + +/** + * ib_destroy_fmr_pool - Free FMR pool + * @pool:FMR pool to free + * + * Destroy an FMR pool and free all associated resources. + */ +void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) +{ + struct ib_pool_fmr *fmr; + struct ib_pool_fmr *tmp; + LIST_HEAD(fmr_list); + int i; + + kthread_stop(pool->thread); + ib_fmr_batch_release(pool); + + i = 0; + list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + if (fmr->remap_count) { + INIT_LIST_HEAD(&fmr_list); + list_add_tail(&fmr->fmr->list, &fmr_list); + ib_unmap_fmr(&fmr_list); + } + ib_dealloc_fmr(fmr->fmr); + list_del(&fmr->list); + kfree(fmr); + ++i; + } + + if (i < pool->pool_size) + pr_warn(PFX "pool still has %d regions registered\n", + pool->pool_size - i); + + kfree(pool->cache_bucket); + kfree(pool); +} +EXPORT_SYMBOL(ib_destroy_fmr_pool); + +/** + * ib_flush_fmr_pool - Invalidate all unmapped FMRs + * @pool:FMR pool to flush + * + * Ensure that all unmapped FMRs are fully invalidated. + */ +int ib_flush_fmr_pool(struct ib_fmr_pool *pool) +{ + int serial; + struct ib_pool_fmr *fmr, *next; + + /* + * The free_list holds FMRs that may have been used + * but have not been remapped enough times to be dirty. + * Put them on the dirty list now so that the cleanup + * thread will reap them too. + */ + spin_lock_irq(&pool->pool_lock); + list_for_each_entry_safe(fmr, next, &pool->free_list, list) { + if (fmr->remap_count > 0) + list_move(&fmr->list, &pool->dirty_list); + } + spin_unlock_irq(&pool->pool_lock); + + serial = atomic_inc_return(&pool->req_ser); + wake_up_process(pool->thread); + + if (wait_event_interruptible(pool->force_wait, + atomic_read(&pool->flush_ser) - serial >= 0)) + return -EINTR; + + return 0; +} +EXPORT_SYMBOL(ib_flush_fmr_pool); + +/** + * ib_fmr_pool_map_phys - + * @pool:FMR pool to allocate FMR from + * @page_list:List of pages to map + * @list_len:Number of pages in @page_list + * @io_virtual_address:I/O virtual address for new FMR + * + * Map an FMR from an FMR pool. + */ +struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, + u64 *page_list, + int list_len, + u64 io_virtual_address) +{ + struct ib_fmr_pool *pool = pool_handle; + struct ib_pool_fmr *fmr; + unsigned long flags; + int result; + + if (list_len < 1 || list_len > pool->max_pages) + return ERR_PTR(-EINVAL); + + spin_lock_irqsave(&pool->pool_lock, flags); + fmr = ib_fmr_cache_lookup(pool, + page_list, + list_len, + io_virtual_address); + if (fmr) { + /* found in cache */ + ++fmr->ref_count; + if (fmr->ref_count == 1) { + list_del(&fmr->list); + } + + spin_unlock_irqrestore(&pool->pool_lock, flags); + + return fmr; + } + + if (list_empty(&pool->free_list)) { + spin_unlock_irqrestore(&pool->pool_lock, flags); + return ERR_PTR(-EAGAIN); + } + + fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); + list_del(&fmr->list); + hlist_del_init(&fmr->cache_node); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, + io_virtual_address); + + if (result) { + spin_lock_irqsave(&pool->pool_lock, flags); + list_add(&fmr->list, &pool->free_list); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + pr_warn(PFX "fmr_map returns %d\n", result); + + return ERR_PTR(result); + } + + ++fmr->remap_count; + fmr->ref_count = 1; + + if (pool->cache_bucket) { + fmr->io_virtual_address = io_virtual_address; + fmr->page_list_len = list_len; + memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); + + spin_lock_irqsave(&pool->pool_lock, flags); + hlist_add_head(&fmr->cache_node, + pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); + spin_unlock_irqrestore(&pool->pool_lock, flags); + } + + return fmr; +} +EXPORT_SYMBOL(ib_fmr_pool_map_phys); + +/** + * ib_fmr_pool_unmap - Unmap FMR + * @fmr:FMR to unmap + * + * Unmap an FMR. The FMR mapping may remain valid until the FMR is + * reused (or until ib_flush_fmr_pool() is called). + */ +int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) +{ + struct ib_fmr_pool *pool; + unsigned long flags; + + pool = fmr->pool; + + spin_lock_irqsave(&pool->pool_lock, flags); + + --fmr->ref_count; + if (!fmr->ref_count) { + if (fmr->remap_count < pool->max_remaps) { + list_add_tail(&fmr->list, &pool->free_list); + } else { + list_add_tail(&fmr->list, &pool->dirty_list); + if (++pool->dirty_len >= pool->dirty_watermark) { + atomic_inc(&pool->req_ser); + wake_up_process(pool->thread); + } + } + } + +#ifdef DEBUG + if (fmr->ref_count < 0) + pr_warn(PFX "FMR %p has ref count %d < 0\n", + fmr, fmr->ref_count); +#endif + + spin_unlock_irqrestore(&pool->pool_lock, flags); + + return 0; +} +EXPORT_SYMBOL(ib_fmr_pool_unmap); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_iwcm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_iwcm.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_iwcm.c (revision 320592) @@ -0,0 +1,1050 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "iwcm.h" + +MODULE_AUTHOR("Tom Tucker"); +MODULE_DESCRIPTION("iWARP CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static struct workqueue_struct *iwcm_wq; +struct iwcm_work { + struct work_struct work; + struct iwcm_id_private *cm_id; + struct list_head list; + struct iw_cm_event event; + struct list_head free_list; +}; + +static unsigned int default_backlog = 256; + +/* + * The following services provide a mechanism for pre-allocating iwcm_work + * elements. The design pre-allocates them based on the cm_id type: + * LISTENING IDS: Get enough elements preallocated to handle the + * listen backlog. + * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE + * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE + * + * Allocating them in connect and listen avoids having to deal + * with allocation failures on the event upcall from the provider (which + * is called in the interrupt context). + * + * One exception is when creating the cm_id for incoming connection requests. + * There are two cases: + * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If + * the backlog is exceeded, then no more connection request events will + * be processed. cm_event_handler() returns -ENOMEM in this case. Its up + * to the provider to reject the connection request. + * 2) in the connection request workqueue handler, cm_conn_req_handler(). + * If work elements cannot be allocated for the new connect request cm_id, + * then IWCM will call the provider reject method. This is ok since + * cm_conn_req_handler() runs in the workqueue thread context. + */ + +static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) +{ + struct iwcm_work *work; + + if (list_empty(&cm_id_priv->work_free_list)) + return NULL; + work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, + free_list); + list_del_init(&work->free_list); + return work; +} + +static void put_work(struct iwcm_work *work) +{ + list_add(&work->free_list, &work->cm_id->work_free_list); +} + +static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) +{ + struct list_head *e, *tmp; + + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + kfree(list_entry(e, struct iwcm_work, free_list)); +} + +static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) +{ + struct iwcm_work *work; + + BUG_ON(!list_empty(&cm_id_priv->work_free_list)); + while (count--) { + work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); + if (!work) { + dealloc_work_entries(cm_id_priv); + return -ENOMEM; + } + work->cm_id = cm_id_priv; + INIT_LIST_HEAD(&work->list); + put_work(work); + } + return 0; +} + +/* + * Save private data from incoming connection requests to + * iw_cm_event, so the low level driver doesn't have to. Adjust + * the event ptr to point to the local copy. + */ +static int copy_private_data(struct iw_cm_event *event) +{ + void *p; + + p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); + if (!p) + return -ENOMEM; + event->private_data = p; + return 0; +} + +static void free_cm_id(struct iwcm_id_private *cm_id_priv) +{ + dealloc_work_entries(cm_id_priv); + kfree(cm_id_priv); +} + +/* + * Release a reference on cm_id. If the last reference is being + * released, free the cm_id and return 1. + */ +static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) +{ + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (atomic_dec_and_test(&cm_id_priv->refcount)) { + BUG_ON(!list_empty(&cm_id_priv->work_list)); + free_cm_id(cm_id_priv); + return 1; + } + + return 0; +} + +static void add_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + atomic_inc(&cm_id_priv->refcount); +} + +static void rem_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + (void)iwcm_deref_id(cm_id_priv); +} + +static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); + +struct iw_cm_id *iw_create_cm_id(struct ib_device *device, + iw_cm_handler cm_handler, + void *context) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + cm_id_priv->state = IW_CM_STATE_IDLE; + cm_id_priv->id.device = device; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + cm_id_priv->id.event_handler = cm_event_handler; + cm_id_priv->id.add_ref = add_ref; + cm_id_priv->id.rem_ref = rem_ref; + spin_lock_init(&cm_id_priv->lock); + atomic_set(&cm_id_priv->refcount, 1); + init_waitqueue_head(&cm_id_priv->connect_wait); + init_completion(&cm_id_priv->destroy_comp); + INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->work_free_list); + + return &cm_id_priv->id; +} +EXPORT_SYMBOL(iw_create_cm_id); + + +static int iwcm_modify_qp_err(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + if (!qp) + return -EINVAL; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * This is really the RDMAC CLOSING state. It is most similar to the + * IB SQD QP state. + */ +static int iwcm_modify_qp_sqd(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + BUG_ON(qp == NULL); + qp_attr.qp_state = IB_QPS_SQD; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * CM_ID <-- CLOSING + * + * Block if a passive or active connection is currently being processed. Then + * process the event as follows: + * - If we are ESTABLISHED, move to CLOSING and modify the QP state + * based on the abrupt flag + * - If the connection is already in the CLOSING or IDLE state, the peer is + * disconnecting concurrently with us and we've already seen the + * DISCONNECT event -- ignore the request and return 0 + * - Disconnect on a listening endpoint returns -EINVAL + */ +int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + struct ib_qp *qp = NULL; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* Wait if we're currently in a connect or accept downcall */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_CLOSING; + + /* QP could be for user-mode client */ + if (cm_id_priv->qp) + qp = cm_id_priv->qp; + else + ret = -EINVAL; + break; + case IW_CM_STATE_LISTEN: + ret = -EINVAL; + break; + case IW_CM_STATE_CLOSING: + /* remote peer closed first */ + case IW_CM_STATE_IDLE: + /* accept or connect returned !0 */ + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called disconnect before/without calling accept after + * connect_request event delivered. + */ + break; + case IW_CM_STATE_CONN_SENT: + /* Can only get here if wait above fails */ + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (qp) { + if (abrupt) + ret = iwcm_modify_qp_err(qp); + else + ret = iwcm_modify_qp_sqd(qp); + + /* + * If both sides are disconnecting the QP could + * already be in ERR or SQD states + */ + ret = 0; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_disconnect); + +/* + * CM_ID <-- DESTROYING + * + * Clean up all resources associated with the connection and release + * the initial reference taken by iw_create_cm_id. + */ +static void destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* + * Wait if we're currently in a connect or accept downcall. A + * listening endpoint should never block here. + */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + /* + * Since we're deleting the cm_id, drop any events that + * might arrive before the last dereference. + */ + set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_LISTEN: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* destroy the listening endpoint */ + cm_id->device->iwcm->destroy_listen(cm_id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* Abrupt close of the connection */ + (void)iwcm_modify_qp_err(cm_id_priv->qp); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called destroy before/without calling accept after + * receiving connection request event notification or + * returned non zero from the event callback function. + * In either case, must tell the provider to reject. + */ + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + cm_id->device->iwcm->reject(cm_id, NULL, 0); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_DESTROYING: + default: + BUG(); + break; + } + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + (void)iwcm_deref_id(cm_id_priv); +} + +/* + * This function is only called by the application thread and cannot + * be called by the event thread. The function will wait for all + * references to be released on the cm_id and then kfree the cm_id + * object. + */ +void iw_destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + destroy_cm_id(cm_id); +} +EXPORT_SYMBOL(iw_destroy_cm_id); + +/** + * iw_cm_map - Use portmapper to map the ports + * @cm_id: connection manager pointer + * @active: Indicates the active side when true + * returns nonzero for error only if iwpm_create_mapinfo() fails + * + * Tries to add a mapping for a port using the Portmapper. If + * successful in mapping the IP/Port it will check the remote + * mapped IP address for a wildcard IP address and replace the + * zero IP address with the remote_addr. + */ +static int iw_cm_map(struct iw_cm_id *cm_id, bool active) +{ + cm_id->m_local_addr = cm_id->local_addr; + cm_id->m_remote_addr = cm_id->remote_addr; + + return 0; +} + +/* + * CM_ID <-- LISTEN + * + * Start listening for connect requests. Generates one CONNECT_REQUEST + * event for each inbound connect request. + */ +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + if (!backlog) + backlog = default_backlog; + + ret = alloc_work_entries(cm_id_priv, backlog); + if (ret) + return ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + cm_id_priv->state = IW_CM_STATE_LISTEN; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = iw_cm_map(cm_id, false); + if (!ret) + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + if (ret) + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + default: + ret = -EINVAL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} +EXPORT_SYMBOL(iw_cm_listen); + +/* + * CM_ID <-- IDLE + * + * Rejects an inbound connection request. No events are generated. + */ +int iw_cm_reject(struct iw_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->reject(cm_id, private_data, + private_data_len); + + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} +EXPORT_SYMBOL(iw_cm_reject); + +/* + * CM_ID <-- ESTABLISHED + * + * Accepts an inbound connection request and generates an ESTABLISHED + * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block + * until the ESTABLISHED event is received from the provider. + */ +int iw_cm_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->accept(cm_id, iw_param); + if (ret) { + /* An error on accept precludes provider events */ + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_accept); + +/* + * Active Side: CM_ID <-- CONN_SENT + * + * If successful, results in the generation of a CONNECT_REPLY + * event. iw_cm_disconnect and iw_cm_destroy will block until the + * CONNECT_REPLY event is received from the provider. + */ +int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + int ret; + unsigned long flags; + struct ib_qp *qp; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, 4); + if (ret) + return ret; + + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->state != IW_CM_STATE_IDLE) { + ret = -EINVAL; + goto err; + } + + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + ret = -EINVAL; + goto err; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + cm_id_priv->state = IW_CM_STATE_CONN_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = iw_cm_map(cm_id, true); + if (!ret) + ret = cm_id->device->iwcm->connect(cm_id, iw_param); + if (!ret) + return 0; /* success */ + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; +err: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return ret; +} +EXPORT_SYMBOL(iw_cm_connect); + +/* + * Passive Side: new CM_ID <-- CONN_RECV + * + * Handles an inbound connect request. The function creates a new + * iw_cm_id to represent the new connection and inherits the client + * callback function and other attributes from the listening parent. + * + * The work item contains a pointer to the listen_cm_id and the event. The + * listen_cm_id contains the client cm_handler, context and + * device. These are copied when the device is cloned. The event + * contains the new four tuple. + * + * An error on the child should not affect the parent, so this + * function does not return a value. + */ +static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + struct iw_cm_id *cm_id; + struct iwcm_id_private *cm_id_priv; + int ret; + + /* + * The provider should never generate a connection request + * event with a bad status. + */ + BUG_ON(iw_event->status); + + cm_id = iw_create_cm_id(listen_id_priv->id.device, + listen_id_priv->id.cm_handler, + listen_id_priv->id.context); + /* If the cm_id could not be created, ignore the request */ + if (IS_ERR(cm_id)) + goto out; + + cm_id->provider_data = iw_event->provider_data; + cm_id->m_local_addr = iw_event->local_addr; + cm_id->m_remote_addr = iw_event->remote_addr; + cm_id->local_addr = listen_id_priv->id.local_addr; + cm_id->remote_addr = iw_event->remote_addr; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + cm_id_priv->state = IW_CM_STATE_CONN_RECV; + + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + goto out; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + + ret = alloc_work_entries(cm_id_priv, 3); + if (ret) { + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + goto out; + } + + /* Call the client CM handler */ + ret = cm_id->cm_handler(cm_id, iw_event); + if (ret) { + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + } + +out: + if (iw_event->private_data_len) + kfree(iw_event->private_data); +} + +/* + * Passive Side: CM_ID <-- ESTABLISHED + * + * The provider generated an ESTABLISHED event which means that + * the MPA negotion has completed successfully and we are now in MPA + * FPDU mode. + * + * This event can only be received in the CONN_RECV state. If the + * remote peer closed, the ESTABLISHED event would be received followed + * by the CLOSE event. If the app closes, it will block until we wake + * it up after processing this event. + */ +static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + + /* + * We clear the CONNECT_WAIT bit here to allow the callback + * function to call iw_cm_disconnect. Calling iw_destroy_cm_id + * from a callback handler is not allowed. + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * Active Side: CM_ID <-- ESTABLISHED + * + * The app has called connect and is waiting for the established event to + * post it's requests to the server. This event will wake up anyone + * blocked in iw_cm_disconnect or iw_destroy_id. + */ +static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + /* + * Clear the connect wait bit so a callback function calling + * iw_cm_disconnect will not wait and deadlock this thread + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + if (iw_event->status == 0) { + cm_id_priv->id.m_local_addr = iw_event->local_addr; + cm_id_priv->id.m_remote_addr = iw_event->remote_addr; + iw_event->local_addr = cm_id_priv->id.local_addr; + iw_event->remote_addr = cm_id_priv->id.remote_addr; + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + } else { + /* REJECTED or RESET */ + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + cm_id_priv->state = IW_CM_STATE_IDLE; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + + if (iw_event->private_data_len) + kfree(iw_event->private_data); + + /* Wake up waiters on connect complete */ + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * CM_ID <-- CLOSING + * + * If in the ESTABLISHED state, move to CLOSING. + */ +static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) + cm_id_priv->state = IW_CM_STATE_CLOSING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * CM_ID <-- IDLE + * + * If in the ESTBLISHED or CLOSING states, the QP will have have been + * moved by the provider to the ERR state. Disassociate the CM_ID from + * the QP, move to IDLE, and remove the 'connected' reference. + * + * If in some other state, the cm_id was destroyed asynchronously. + * This is the last reference that will result in waking up + * the app thread blocked in iw_destroy_cm_id. + */ +static int cm_close_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_DESTROYING: + break; + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +static int process_event(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + int ret = 0; + + switch (iw_event->event) { + case IW_CM_EVENT_CONNECT_REQUEST: + cm_conn_req_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CONNECT_REPLY: + ret = cm_conn_rep_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_ESTABLISHED: + ret = cm_conn_est_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_DISCONNECT: + cm_disconnect_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CLOSE: + ret = cm_close_handler(cm_id_priv, iw_event); + break; + default: + BUG(); + } + + return ret; +} + +/* + * Process events on the work_list for the cm_id. If the callback + * function requests that the cm_id be deleted, a flag is set in the + * cm_id flags to indicate that when the last reference is + * removed, the cm_id is to be destroyed. This is necessary to + * distinguish between an object that will be destroyed by the app + * thread asleep on the destroy_comp list vs. an object destroyed + * here synchronously when the last reference is removed. + */ +static void cm_work_handler(struct work_struct *_work) +{ + struct iwcm_work *work = container_of(_work, struct iwcm_work, work); + struct iw_cm_event levent; + struct iwcm_id_private *cm_id_priv = work->cm_id; + unsigned long flags; + int empty; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + empty = list_empty(&cm_id_priv->work_list); + while (!empty) { + work = list_entry(cm_id_priv->work_list.next, + struct iwcm_work, list); + list_del_init(&work->list); + empty = list_empty(&cm_id_priv->work_list); + levent = work->event; + put_work(work); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { + ret = process_event(cm_id_priv, &levent); + if (ret) + destroy_cm_id(&cm_id_priv->id); + } else + pr_debug("dropping event %d\n", levent.event); + if (iwcm_deref_id(cm_id_priv)) + return; + if (empty) + return; + spin_lock_irqsave(&cm_id_priv->lock, flags); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * This function is called on interrupt context. Schedule events on + * the iwcm_wq thread to allow callback functions to downcall into + * the CM and/or block. Events are queued to a per-CM_ID + * work_list. If this is the first event on the work_list, the work + * element is also queued on the iwcm_wq thread. + * + * Each event holds a reference on the cm_id. Until the last posted + * event has been delivered and processed, the cm_id cannot be + * deleted. + * + * Returns: + * 0 - the event was handled. + * -ENOMEM - the event was not handled due to lack of resources. + */ +static int cm_event_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct iwcm_work *work; + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + work = get_work(cm_id_priv); + if (!work) { + ret = -ENOMEM; + goto out; + } + + INIT_WORK(&work->work, cm_work_handler); + work->cm_id = cm_id_priv; + work->event = *iw_event; + + if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || + work->event.event == IW_CM_EVENT_CONNECT_REPLY) && + work->event.private_data_len) { + ret = copy_private_data(&work->event); + if (ret) { + put_work(work); + goto out; + } + } + + atomic_inc(&cm_id_priv->refcount); + if (list_empty(&cm_id_priv->work_list)) { + list_add_tail(&work->list, &cm_id_priv->work_list); + queue_work(iwcm_wq, &work->work); + } else + list_add_tail(&work->list, &cm_id_priv->work_list); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_READ; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = 0; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct iwcm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + case IB_QPS_RTR: + ret = iwcm_init_qp_init_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = iwcm_init_qp_rts_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(iw_cm_init_qp_attr); + +static int __init iw_cm_init(void) +{ + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); + if (!iwcm_wq) + return -ENOMEM; + + return 0; +} + +static void __exit iw_cm_cleanup(void) +{ + destroy_workqueue(iwcm_wq); +} + +module_init(iw_cm_init); +module_exit(iw_cm_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_iwcm.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad.c (revision 320592) @@ -0,0 +1,3339 @@ +/* + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#define LINUXKPI_PARAM_PREFIX ibcore_ +#define KBUILD_MODNAME "ibcore" + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "mad_priv.h" +#include "mad_rmpp.h" +#include "smi.h" +#include "opa_smi.h" +#include "agent.h" +#include "core_priv.h" + +static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; +static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; + +module_param_named(send_queue_size, mad_sendq_size, int, 0444); +MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); +module_param_named(recv_queue_size, mad_recvq_size, int, 0444); +MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); + +static struct list_head ib_mad_port_list; +static u32 ib_mad_client_id = 0; + +/* Port list lock */ +static DEFINE_SPINLOCK(ib_mad_port_list_lock); + +/* Forward declarations */ +static int method_in_use(struct ib_mad_mgmt_method_table **method, + struct ib_mad_reg_req *mad_reg_req); +static void remove_mad_reg_req(struct ib_mad_agent_private *priv); +static struct ib_mad_agent_private *find_mad_agent( + struct ib_mad_port_private *port_priv, + const struct ib_mad_hdr *mad); +static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, + struct ib_mad_private *mad); +static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); +static void timeout_sends(struct work_struct *work); +static void local_completions(struct work_struct *work); +static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv, + u8 mgmt_class); +static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv); +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, + struct ib_wc *wc); +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); + +/* + * Returns a ib_mad_port_private structure or NULL for a device/port + * Assumes ib_mad_port_list_lock is being held + */ +static inline struct ib_mad_port_private * +__ib_get_mad_port(struct ib_device *device, int port_num) +{ + struct ib_mad_port_private *entry; + + list_for_each_entry(entry, &ib_mad_port_list, port_list) { + if (entry->device == device && entry->port_num == port_num) + return entry; + } + return NULL; +} + +/* + * Wrapper function to return a ib_mad_port_private structure or NULL + * for a device/port + */ +static inline struct ib_mad_port_private * +ib_get_mad_port(struct ib_device *device, int port_num) +{ + struct ib_mad_port_private *entry; + unsigned long flags; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + entry = __ib_get_mad_port(device, port_num); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + return entry; +} + +static inline u8 convert_mgmt_class(u8 mgmt_class) +{ + /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ + return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? + 0 : mgmt_class; +} + +static int get_spl_qp_index(enum ib_qp_type qp_type) +{ + switch (qp_type) + { + case IB_QPT_SMI: + return 0; + case IB_QPT_GSI: + return 1; + default: + return -1; + } +} + +static int vendor_class_index(u8 mgmt_class) +{ + return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; +} + +static int is_vendor_class(u8 mgmt_class) +{ + if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || + (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return 0; + return 1; +} + +static int is_vendor_oui(char *oui) +{ + if (oui[0] || oui[1] || oui[2]) + return 1; + return 0; +} + +static int is_vendor_method_in_use( + struct ib_mad_mgmt_vendor_class *vendor_class, + struct ib_mad_reg_req *mad_reg_req) +{ + struct ib_mad_mgmt_method_table *method; + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) { + if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { + method = vendor_class->method_table[i]; + if (method) { + if (method_in_use(&method, mad_reg_req)) + return 1; + else + break; + } + } + } + return 0; +} + +int ib_response_mad(const struct ib_mad_hdr *hdr) +{ + return ((hdr->method & IB_MGMT_METHOD_RESP) || + (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && + (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); +} +EXPORT_SYMBOL(ib_response_mad); + +/* + * ib_register_mad_agent - Register to send/receive MADs + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context, + u32 registration_flags) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_agent *ret = ERR_PTR(-EINVAL); + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_reg_req *reg_req = NULL; + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + struct ib_mad_mgmt_method_table *method; + int ret2, qpn; + unsigned long flags; + u8 mgmt_class, vclass; + + /* Validate parameters */ + qpn = get_spl_qp_index(qp_type); + if (qpn == -1) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid QP Type %d\n", + qp_type); + goto error1; + } + + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid RMPP Version %u\n", + rmpp_version); + goto error1; + } + + /* Validate MAD registration request if supplied */ + if (mad_reg_req) { + if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid Class Version %u\n", + mad_reg_req->mgmt_class_version); + goto error1; + } + if (!recv_handler) { + dev_notice(&device->dev, + "ib_register_mad_agent: no recv_handler\n"); + goto error1; + } + if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { + /* + * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only + * one in this range currently allowed + */ + if (mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", + mad_reg_req->mgmt_class); + goto error1; + } + } else if (mad_reg_req->mgmt_class == 0) { + /* + * Class 0 is reserved in IBA and is used for + * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + */ + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid Mgmt Class 0\n"); + goto error1; + } else if (is_vendor_class(mad_reg_req->mgmt_class)) { + /* + * If class is in "new" vendor range, + * ensure supplied OUI is not zero + */ + if (!is_vendor_oui(mad_reg_req->oui)) { + dev_notice(&device->dev, + "ib_register_mad_agent: No OUI specified for class 0x%x\n", + mad_reg_req->mgmt_class); + goto error1; + } + } + /* Make sure class supplied is consistent with RMPP */ + if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { + if (rmpp_version) { + dev_notice(&device->dev, + "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", + mad_reg_req->mgmt_class); + goto error1; + } + } + + /* Make sure class supplied is consistent with QP type */ + if (qp_type == IB_QPT_SMI) { + if ((mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_LID_ROUTED) && + (mad_reg_req->mgmt_class != + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", + mad_reg_req->mgmt_class); + goto error1; + } + } else { + if ((mad_reg_req->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_reg_req->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", + mad_reg_req->mgmt_class); + goto error1; + } + } + } else { + /* No registration request supplied */ + if (!send_handler) + goto error1; + if (registration_flags & IB_MAD_USER_RMPP) + goto error1; + } + + /* Validate device and port */ + port_priv = ib_get_mad_port(device, port_num); + if (!port_priv) { + dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n"); + ret = ERR_PTR(-ENODEV); + goto error1; + } + + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0 */ + if (!port_priv->qp_info[qpn].qp) { + dev_notice(&device->dev, + "ib_register_mad_agent: QP %d not supported\n", qpn); + ret = ERR_PTR(-EPROTONOSUPPORT); + goto error1; + } + + /* Allocate structures */ + mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); + if (!mad_agent_priv) { + ret = ERR_PTR(-ENOMEM); + goto error1; + } + + if (mad_reg_req) { + reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); + if (!reg_req) { + ret = ERR_PTR(-ENOMEM); + goto error3; + } + } + + /* Now, fill in the various structures */ + mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; + mad_agent_priv->reg_req = reg_req; + mad_agent_priv->agent.rmpp_version = rmpp_version; + mad_agent_priv->agent.device = device; + mad_agent_priv->agent.recv_handler = recv_handler; + mad_agent_priv->agent.send_handler = send_handler; + mad_agent_priv->agent.context = context; + mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; + mad_agent_priv->agent.port_num = port_num; + mad_agent_priv->agent.flags = registration_flags; + spin_lock_init(&mad_agent_priv->lock); + INIT_LIST_HEAD(&mad_agent_priv->send_list); + INIT_LIST_HEAD(&mad_agent_priv->wait_list); + INIT_LIST_HEAD(&mad_agent_priv->done_list); + INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); + INIT_LIST_HEAD(&mad_agent_priv->local_list); + INIT_WORK(&mad_agent_priv->local_work, local_completions); + atomic_set(&mad_agent_priv->refcount, 1); + init_completion(&mad_agent_priv->comp); + + spin_lock_irqsave(&port_priv->reg_lock, flags); + mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; + + /* + * Make sure MAD registration (if supplied) + * is non overlapping with any existing ones + */ + if (mad_reg_req) { + mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); + if (!is_vendor_class(mgmt_class)) { + class = port_priv->version[mad_reg_req-> + mgmt_class_version].class; + if (class) { + method = class->method_table[mgmt_class]; + if (method) { + if (method_in_use(&method, + mad_reg_req)) + goto error4; + } + } + ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, + mgmt_class); + } else { + /* "New" vendor class range */ + vendor = port_priv->version[mad_reg_req-> + mgmt_class_version].vendor; + if (vendor) { + vclass = vendor_class_index(mgmt_class); + vendor_class = vendor->vendor_class[vclass]; + if (vendor_class) { + if (is_vendor_method_in_use( + vendor_class, + mad_reg_req)) + goto error4; + } + } + ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); + } + if (ret2) { + ret = ERR_PTR(ret2); + goto error4; + } + } + + /* Add mad agent into port's agent list */ + list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + + return &mad_agent_priv->agent; + +error4: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + kfree(reg_req); +error3: + kfree(mad_agent_priv); +error1: + return ret; +} +EXPORT_SYMBOL(ib_register_mad_agent); + +static inline int is_snooping_sends(int mad_snoop_flags) +{ + return (mad_snoop_flags & + (/*IB_MAD_SNOOP_POSTED_SENDS | + IB_MAD_SNOOP_RMPP_SENDS |*/ + IB_MAD_SNOOP_SEND_COMPLETIONS /*| + IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); +} + +static inline int is_snooping_recvs(int mad_snoop_flags) +{ + return (mad_snoop_flags & + (IB_MAD_SNOOP_RECVS /*| + IB_MAD_SNOOP_RMPP_RECVS*/)); +} + +static int register_snoop_agent(struct ib_mad_qp_info *qp_info, + struct ib_mad_snoop_private *mad_snoop_priv) +{ + struct ib_mad_snoop_private **new_snoop_table; + unsigned long flags; + int i; + + spin_lock_irqsave(&qp_info->snoop_lock, flags); + /* Check for empty slot in array. */ + for (i = 0; i < qp_info->snoop_table_size; i++) + if (!qp_info->snoop_table[i]) + break; + + if (i == qp_info->snoop_table_size) { + /* Grow table. */ + new_snoop_table = krealloc(qp_info->snoop_table, + sizeof mad_snoop_priv * + (qp_info->snoop_table_size + 1), + GFP_ATOMIC); + if (!new_snoop_table) { + i = -ENOMEM; + goto out; + } + + qp_info->snoop_table = new_snoop_table; + qp_info->snoop_table_size++; + } + qp_info->snoop_table[i] = mad_snoop_priv; + atomic_inc(&qp_info->snoop_count); +out: + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + return i; +} + +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_agent *ret; + struct ib_mad_snoop_private *mad_snoop_priv; + int qpn; + + /* Validate parameters */ + if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || + (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { + ret = ERR_PTR(-EINVAL); + goto error1; + } + qpn = get_spl_qp_index(qp_type); + if (qpn == -1) { + ret = ERR_PTR(-EINVAL); + goto error1; + } + port_priv = ib_get_mad_port(device, port_num); + if (!port_priv) { + ret = ERR_PTR(-ENODEV); + goto error1; + } + /* Allocate structures */ + mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); + if (!mad_snoop_priv) { + ret = ERR_PTR(-ENOMEM); + goto error1; + } + + /* Now, fill in the various structures */ + mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; + mad_snoop_priv->agent.device = device; + mad_snoop_priv->agent.recv_handler = recv_handler; + mad_snoop_priv->agent.snoop_handler = snoop_handler; + mad_snoop_priv->agent.context = context; + mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; + mad_snoop_priv->agent.port_num = port_num; + mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; + init_completion(&mad_snoop_priv->comp); + mad_snoop_priv->snoop_index = register_snoop_agent( + &port_priv->qp_info[qpn], + mad_snoop_priv); + if (mad_snoop_priv->snoop_index < 0) { + ret = ERR_PTR(mad_snoop_priv->snoop_index); + goto error2; + } + + atomic_set(&mad_snoop_priv->refcount, 1); + return &mad_snoop_priv->agent; + +error2: + kfree(mad_snoop_priv); +error1: + return ret; +} +EXPORT_SYMBOL(ib_register_mad_snoop); + +static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + complete(&mad_agent_priv->comp); +} + +static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) +{ + if (atomic_dec_and_test(&mad_snoop_priv->refcount)) + complete(&mad_snoop_priv->comp); +} + +static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_port_private *port_priv; + unsigned long flags; + + /* Note that we could still be handling received MADs */ + + /* + * Canceling all sends results in dropping received response + * MADs, preventing us from queuing additional work + */ + cancel_mads(mad_agent_priv); + port_priv = mad_agent_priv->qp_info->port_priv; + cancel_delayed_work(&mad_agent_priv->timed_work); + + spin_lock_irqsave(&port_priv->reg_lock, flags); + remove_mad_reg_req(mad_agent_priv); + list_del(&mad_agent_priv->agent_list); + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + + flush_workqueue(port_priv->wq); + ib_cancel_rmpp_recvs(mad_agent_priv); + + deref_mad_agent(mad_agent_priv); + wait_for_completion(&mad_agent_priv->comp); + + kfree(mad_agent_priv->reg_req); + kfree(mad_agent_priv); +} + +static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) +{ + struct ib_mad_qp_info *qp_info; + unsigned long flags; + + qp_info = mad_snoop_priv->qp_info; + spin_lock_irqsave(&qp_info->snoop_lock, flags); + qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; + atomic_dec(&qp_info->snoop_count); + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + + deref_snoop_agent(mad_snoop_priv); + wait_for_completion(&mad_snoop_priv->comp); + + kfree(mad_snoop_priv); +} + +/* + * ib_unregister_mad_agent - Unregisters a client from using MAD services + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_snoop_private *mad_snoop_priv; + + /* If the TID is zero, the agent can only snoop. */ + if (mad_agent->hi_tid) { + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); + } else { + mad_snoop_priv = container_of(mad_agent, + struct ib_mad_snoop_private, + agent); + unregister_mad_snoop(mad_snoop_priv); + } + return 0; +} +EXPORT_SYMBOL(ib_unregister_mad_agent); + +static void dequeue_mad(struct ib_mad_list_head *mad_list) +{ + struct ib_mad_queue *mad_queue; + unsigned long flags; + + BUG_ON(!mad_list->mad_queue); + mad_queue = mad_list->mad_queue; + spin_lock_irqsave(&mad_queue->lock, flags); + list_del(&mad_list->list); + mad_queue->count--; + spin_unlock_irqrestore(&mad_queue->lock, flags); +} + +static void snoop_send(struct ib_mad_qp_info *qp_info, + struct ib_mad_send_buf *send_buf, + struct ib_mad_send_wc *mad_send_wc, + int mad_snoop_flags) +{ + struct ib_mad_snoop_private *mad_snoop_priv; + unsigned long flags; + int i; + + spin_lock_irqsave(&qp_info->snoop_lock, flags); + for (i = 0; i < qp_info->snoop_table_size; i++) { + mad_snoop_priv = qp_info->snoop_table[i]; + if (!mad_snoop_priv || + !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) + continue; + + atomic_inc(&mad_snoop_priv->refcount); + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, + send_buf, mad_send_wc); + deref_snoop_agent(mad_snoop_priv); + spin_lock_irqsave(&qp_info->snoop_lock, flags); + } + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); +} + +static void snoop_recv(struct ib_mad_qp_info *qp_info, + struct ib_mad_recv_wc *mad_recv_wc, + int mad_snoop_flags) +{ + struct ib_mad_snoop_private *mad_snoop_priv; + unsigned long flags; + int i; + + spin_lock_irqsave(&qp_info->snoop_lock, flags); + for (i = 0; i < qp_info->snoop_table_size; i++) { + mad_snoop_priv = qp_info->snoop_table[i]; + if (!mad_snoop_priv || + !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) + continue; + + atomic_inc(&mad_snoop_priv->refcount); + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); + mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL, + mad_recv_wc); + deref_snoop_agent(mad_snoop_priv); + spin_lock_irqsave(&qp_info->snoop_lock, flags); + } + spin_unlock_irqrestore(&qp_info->snoop_lock, flags); +} + +static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, + u16 pkey_index, u8 port_num, struct ib_wc *wc) +{ + memset(wc, 0, sizeof *wc); + wc->wr_cqe = cqe; + wc->status = IB_WC_SUCCESS; + wc->opcode = IB_WC_RECV; + wc->pkey_index = pkey_index; + wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); + wc->src_qp = IB_QP0; + wc->qp = qp; + wc->slid = slid; + wc->sl = 0; + wc->dlid_path_bits = 0; + wc->port_num = port_num; +} + +static size_t mad_priv_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_mad_private) + mp->mad_size; +} + +static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) +{ + size_t size = sizeof(struct ib_mad_private) + mad_size; + struct ib_mad_private *ret = kzalloc(size, flags); + + if (ret) + ret->mad_size = mad_size; + + return ret; +} + +static size_t port_mad_size(const struct ib_mad_port_private *port_priv) +{ + return rdma_max_mad_size(port_priv->device, port_priv->port_num); +} + +static size_t mad_priv_dma_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_grh) + mp->mad_size; +} + +/* + * Return 0 if SMP is to be sent + * Return 1 if SMP was consumed locally (whether or not solicited) + * Return < 0 if error + */ +static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_wr_private *mad_send_wr) +{ + int ret = 0; + struct ib_smp *smp = mad_send_wr->send_buf.mad; + struct opa_smp *opa_smp = (struct opa_smp *)smp; + unsigned long flags; + struct ib_mad_local_private *local; + struct ib_mad_private *mad_priv; + struct ib_mad_port_private *port_priv; + struct ib_mad_agent_private *recv_mad_agent = NULL; + struct ib_device *device = mad_agent_priv->agent.device; + u8 port_num; + struct ib_wc mad_wc; + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; + size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); + u16 out_mad_pkey_index = 0; + u16 drslid; + bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + + if (rdma_cap_ib_switch(device) && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + port_num = send_wr->port_num; + else + port_num = mad_agent_priv->agent.port_num; + + /* + * Directed route handling starts if the initial LID routed part of + * a request or the ending LID routed part of a response is empty. + * If we are at the start of the LID routed part, don't update the + * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. + */ + if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { + u32 opa_drslid; + + if ((opa_get_smp_direction(opa_smp) + ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == + OPA_LID_PERMISSIVE && + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), + port_num) == IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid directed route\n"); + goto out; + } + opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && + opa_drslid & 0xffff0000) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", + opa_drslid); + goto out; + } + drslid = (u16)(opa_drslid & 0x0000ffff); + + /* Check to post send on QP or process locally */ + if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && + opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) + goto out; + } else { + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == + IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "Invalid directed route\n"); + goto out; + } + drslid = be16_to_cpu(smp->dr_slid); + + /* Check to post send on QP or process locally */ + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) + goto out; + } + + local = kmalloc(sizeof *local, GFP_ATOMIC); + if (!local) { + ret = -ENOMEM; + dev_err(&device->dev, "No memory for ib_mad_local_private\n"); + goto out; + } + local->mad_priv = NULL; + local->recv_mad_agent = NULL; + mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); + if (!mad_priv) { + ret = -ENOMEM; + dev_err(&device->dev, "No memory for local response MAD\n"); + kfree(local); + goto out; + } + + build_smp_wc(mad_agent_priv->agent.qp, + send_wr->wr.wr_cqe, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); + + if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { + mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + + mad_send_wr->send_buf.data_len + + sizeof(struct ib_grh); + } + + /* No GRH for DR SMP */ + ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, + (const struct ib_mad_hdr *)smp, mad_size, + (struct ib_mad_hdr *)mad_priv->mad, + &mad_size, &out_mad_pkey_index); + switch (ret) + { + case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: + if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && + mad_agent_priv->agent.recv_handler) { + local->mad_priv = mad_priv; + local->recv_mad_agent = mad_agent_priv; + /* + * Reference MAD agent until receive + * side of local completion handled + */ + atomic_inc(&mad_agent_priv->refcount); + } else + kfree(mad_priv); + break; + case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: + kfree(mad_priv); + break; + case IB_MAD_RESULT_SUCCESS: + /* Treat like an incoming receive MAD */ + port_priv = ib_get_mad_port(mad_agent_priv->agent.device, + mad_agent_priv->agent.port_num); + if (port_priv) { + memcpy(mad_priv->mad, smp, mad_priv->mad_size); + recv_mad_agent = find_mad_agent(port_priv, + (const struct ib_mad_hdr *)mad_priv->mad); + } + if (!port_priv || !recv_mad_agent) { + /* + * No receiving agent so drop packet and + * generate send completion. + */ + kfree(mad_priv); + break; + } + local->mad_priv = mad_priv; + local->recv_mad_agent = recv_mad_agent; + break; + default: + kfree(mad_priv); + kfree(local); + ret = -EINVAL; + goto out; + } + + local->mad_send_wr = mad_send_wr; + if (opa) { + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; + local->return_wc_byte_len = mad_size; + } + /* Reference MAD agent until send side of local completion handled */ + atomic_inc(&mad_agent_priv->refcount); + /* Queue local completion to local list */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&local->completion_list, &mad_agent_priv->local_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + queue_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->local_work); + ret = 1; +out: + return ret; +} + +static int get_pad_size(int hdr_len, int data_len, size_t mad_size) +{ + int seg_size, pad; + + seg_size = mad_size - hdr_len; + if (data_len && seg_size) { + pad = seg_size - data_len % seg_size; + return pad == seg_size ? 0 : pad; + } else + return seg_size; +} + +static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_segment *s, *t; + + list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { + list_del(&s->list); + kfree(s); + } +} + +static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, + size_t mad_size, gfp_t gfp_mask) +{ + struct ib_mad_send_buf *send_buf = &send_wr->send_buf; + struct ib_rmpp_mad *rmpp_mad = send_buf->mad; + struct ib_rmpp_segment *seg = NULL; + int left, seg_size, pad; + + send_buf->seg_size = mad_size - send_buf->hdr_len; + send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; + seg_size = send_buf->seg_size; + pad = send_wr->pad; + + /* Allocate data segments. */ + for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { + seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); + if (!seg) { + dev_err(&send_buf->mad_agent->device->dev, + "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", + sizeof (*seg) + seg_size, gfp_mask); + free_send_rmpp_list(send_wr); + return -ENOMEM; + } + seg->num = ++send_buf->seg_count; + list_add_tail(&seg->list, &send_wr->rmpp_list); + } + + /* Zero any padding */ + if (pad) + memset(seg->data + seg_size - pad, 0, pad); + + rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> + agent.rmpp_version; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + send_wr->cur_seg = container_of(send_wr->rmpp_list.next, + struct ib_rmpp_segment, list); + send_wr->last_ack_seg = send_wr->cur_seg; + return 0; +} + +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) +{ + return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); +} +EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); + +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + int rmpp_active, + int hdr_len, int data_len, + gfp_t gfp_mask, + u8 base_version) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + int pad, message_size, ret, size; + void *buf; + size_t mad_size; + bool opa; + + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); + + opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); + + if (opa && base_version == OPA_MGMT_BASE_VERSION) + mad_size = sizeof(struct opa_mad); + else + mad_size = sizeof(struct ib_mad); + + pad = get_pad_size(hdr_len, data_len, mad_size); + message_size = hdr_len + data_len + pad; + + if (ib_mad_kernel_rmpp_agent(mad_agent)) { + if (!rmpp_active && message_size > mad_size) + return ERR_PTR(-EINVAL); + } else + if (rmpp_active || message_size > mad_size) + return ERR_PTR(-EINVAL); + + size = rmpp_active ? hdr_len : mad_size; + buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); + if (!buf) + return ERR_PTR(-ENOMEM); + + mad_send_wr = (struct ib_mad_send_wr_private *)((char *)buf + size); + INIT_LIST_HEAD(&mad_send_wr->rmpp_list); + mad_send_wr->send_buf.mad = buf; + mad_send_wr->send_buf.hdr_len = hdr_len; + mad_send_wr->send_buf.data_len = data_len; + mad_send_wr->pad = pad; + + mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->sg_list[0].length = hdr_len; + mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; + + /* OPA MADs don't have to be the full 2048 bytes */ + if (opa && base_version == OPA_MGMT_BASE_VERSION && + data_len < mad_size - hdr_len) + mad_send_wr->sg_list[1].length = data_len; + else + mad_send_wr->sg_list[1].length = mad_size - hdr_len; + + mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; + + mad_send_wr->mad_list.cqe.done = ib_mad_send_done; + + mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; + + if (rmpp_active) { + ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); + if (ret) { + kfree(buf); + return ERR_PTR(ret); + } + } + + mad_send_wr->send_buf.mad_agent = mad_agent; + atomic_inc(&mad_agent_priv->refcount); + return &mad_send_wr->send_buf; +} +EXPORT_SYMBOL(ib_create_send_mad); + +int ib_get_mad_data_offset(u8 mgmt_class) +{ + if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) + return IB_MGMT_SA_HDR; + else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || + (mgmt_class == IB_MGMT_CLASS_BIS)) + return IB_MGMT_DEVICE_HDR; + else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) + return IB_MGMT_VENDOR_HDR; + else + return IB_MGMT_MAD_HDR; +} +EXPORT_SYMBOL(ib_get_mad_data_offset); + +int ib_is_mad_class_rmpp(u8 mgmt_class) +{ + if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || + (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || + (mgmt_class == IB_MGMT_CLASS_BIS) || + ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && + (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) + return 1; + return 0; +} +EXPORT_SYMBOL(ib_is_mad_class_rmpp); + +void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct list_head *list; + + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + list = &mad_send_wr->cur_seg->list; + + if (mad_send_wr->cur_seg->num < seg_num) { + list_for_each_entry(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } else if (mad_send_wr->cur_seg->num > seg_num) { + list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) + if (mad_send_wr->cur_seg->num == seg_num) + break; + } + return mad_send_wr->cur_seg->data; +} +EXPORT_SYMBOL(ib_get_rmpp_segment); + +static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) +{ + if (mad_send_wr->send_buf.seg_count) + return ib_get_rmpp_segment(&mad_send_wr->send_buf, + mad_send_wr->seg_num); + else + return (char *)mad_send_wr->send_buf.mad + + mad_send_wr->send_buf.hdr_len; +} + +void ib_free_send_mad(struct ib_mad_send_buf *send_buf) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); + mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, + send_buf); + + free_send_rmpp_list(mad_send_wr); + kfree(send_buf->mad); + deref_mad_agent(mad_agent_priv); +} +EXPORT_SYMBOL(ib_free_send_mad); + +int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_qp_info *qp_info; + struct list_head *list; + struct ib_send_wr *bad_send_wr; + struct ib_mad_agent *mad_agent; + struct ib_sge *sge; + unsigned long flags; + int ret; + + /* Set WR ID to find mad_send_wr upon completion */ + qp_info = mad_send_wr->mad_agent_priv->qp_info; + mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + mad_send_wr->mad_list.cqe.done = ib_mad_send_done; + mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; + + mad_agent = mad_send_wr->send_buf.mad_agent; + sge = mad_send_wr->sg_list; + sge[0].addr = ib_dma_map_single(mad_agent->device, + mad_send_wr->send_buf.mad, + sge[0].length, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; + + mad_send_wr->header_mapping = sge[0].addr; + + sge[1].addr = ib_dma_map_single(mad_agent->device, + ib_get_payload(mad_send_wr), + sge[1].length, + DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + return -ENOMEM; + } + mad_send_wr->payload_mapping = sge[1].addr; + + spin_lock_irqsave(&qp_info->send_queue.lock, flags); + if (qp_info->send_queue.count < qp_info->send_queue.max_active) { + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, + &bad_send_wr); + list = &qp_info->send_queue.list; + } else { + ret = 0; + list = &qp_info->overflow_list; + } + + if (!ret) { + qp_info->send_queue.count++; + list_add_tail(&mad_send_wr->mad_list.list, list); + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + if (ret) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->payload_mapping, + sge[1].length, DMA_TO_DEVICE); + } + return ret; +} + +/* + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client + */ +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf *next_send_buf; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int ret = -EINVAL; + + /* Walk list of send WRs and post each on send list */ + for (; send_buf; send_buf = next_send_buf) { + + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_agent_priv = mad_send_wr->mad_agent_priv; + + if (!send_buf->mad_agent->send_handler || + (send_buf->timeout_ms && + !send_buf->mad_agent->recv_handler)) { + ret = -EINVAL; + goto error; + } + + if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { + if (mad_agent_priv->agent.rmpp_version) { + ret = -EINVAL; + goto error; + } + } + + /* + * Save pointer to next work request to post in case the + * current one completes, and the user modifies the work + * request associated with the completion + */ + next_send_buf = send_buf->next; + mad_send_wr->send_wr.ah = send_buf->ah; + + if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_dr_smp(mad_agent_priv, + mad_send_wr); + if (ret < 0) /* error */ + goto error; + else if (ret == 1) /* locally consumed */ + continue; + } + + mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; + /* Timeout will be updated after send completes */ + mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); + mad_send_wr->max_retries = send_buf->retries; + mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; + /* Reference for work request to QP + response */ + mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); + mad_send_wr->status = IB_WC_SUCCESS; + + /* Reference MAD agent until send completes */ + atomic_inc(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { + ret = ib_send_rmpp_mad(mad_send_wr); + if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) + ret = ib_send_mad(mad_send_wr); + } else + ret = ib_send_mad(mad_send_wr); + if (ret < 0) { + /* Fail send request */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_del(&mad_send_wr->agent_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + atomic_dec(&mad_agent_priv->refcount); + goto error; + } + } + return 0; +error: + if (bad_send_buf) + *bad_send_buf = send_buf; + return ret; +} +EXPORT_SYMBOL(ib_post_send_mad); + +/* + * ib_free_recv_mad - Returns data buffers used to receive + * a MAD to the access layer + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *priv; + struct list_head free_list; + + INIT_LIST_HEAD(&free_list); + list_splice_init(&mad_recv_wc->rmpp_list, &free_list); + + list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, + &free_list, list) { + mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, + recv_buf); + mad_priv_hdr = container_of(mad_recv_wc, + struct ib_mad_private_header, + recv_wc); + priv = container_of(mad_priv_hdr, struct ib_mad_private, + header); + kfree(priv); + } +} +EXPORT_SYMBOL(ib_free_recv_mad); + +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context) +{ + return ERR_PTR(-EINVAL); /* XXX: for now */ +} +EXPORT_SYMBOL(ib_redirect_mad_qp); + +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + struct ib_wc *wc) +{ + dev_err(&mad_agent->device->dev, + "ib_process_mad_wc() not implemented yet\n"); + return 0; +} +EXPORT_SYMBOL(ib_process_mad_wc); + +static int method_in_use(struct ib_mad_mgmt_method_table **method, + struct ib_mad_reg_req *mad_reg_req) +{ + int i; + + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { + if ((*method)->agent[i]) { + pr_err("Method %d already in use\n", i); + return -EINVAL; + } + } + return 0; +} + +static int allocate_method_table(struct ib_mad_mgmt_method_table **method) +{ + /* Allocate management method table */ + *method = kzalloc(sizeof **method, GFP_ATOMIC); + if (!*method) { + pr_err("No memory for ib_mad_mgmt_method_table\n"); + return -ENOMEM; + } + + return 0; +} + +/* + * Check to see if there are any methods still in use + */ +static int check_method_table(struct ib_mad_mgmt_method_table *method) +{ + int i; + + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) + if (method->agent[i]) + return 1; + return 0; +} + +/* + * Check to see if there are any method tables for this class still in use + */ +static int check_class_table(struct ib_mad_mgmt_class_table *class) +{ + int i; + + for (i = 0; i < MAX_MGMT_CLASS; i++) + if (class->method_table[i]) + return 1; + return 0; +} + +static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) +{ + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) + if (vendor_class->method_table[i]) + return 1; + return 0; +} + +static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, + const char *oui) +{ + int i; + + for (i = 0; i < MAX_MGMT_OUI; i++) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) + return i; + + return -1; +} + +static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) +{ + int i; + + for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) + if (vendor->vendor_class[i]) + return 1; + + return 0; +} + +static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, + struct ib_mad_agent_private *agent) +{ + int i; + + /* Remove any methods for this mad agent */ + for (i = 0; i < IB_MGMT_MAX_METHODS; i++) { + if (method->agent[i] == agent) { + method->agent[i] = NULL; + } + } +} + +static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv, + u8 mgmt_class) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_class_table **class; + struct ib_mad_mgmt_method_table **method; + int i, ret; + + port_priv = agent_priv->qp_info->port_priv; + class = &port_priv->version[mad_reg_req->mgmt_class_version].class; + if (!*class) { + /* Allocate management class table for "new" class version */ + *class = kzalloc(sizeof **class, GFP_ATOMIC); + if (!*class) { + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_class_table\n"); + ret = -ENOMEM; + goto error1; + } + + /* Allocate method table for this management class */ + method = &(*class)->method_table[mgmt_class]; + if ((ret = allocate_method_table(method))) + goto error2; + } else { + method = &(*class)->method_table[mgmt_class]; + if (!*method) { + /* Allocate method table for this management class */ + if ((ret = allocate_method_table(method))) + goto error1; + } + } + + /* Now, make sure methods are not already in use */ + if (method_in_use(method, mad_reg_req)) + goto error3; + + /* Finally, add in methods being registered */ + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) + (*method)->agent[i] = agent_priv; + + return 0; + +error3: + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(*method, agent_priv); + /* Now, check to see if there are any methods in use */ + if (!check_method_table(*method)) { + /* If not, release management method table */ + kfree(*method); + *method = NULL; + } + ret = -EINVAL; + goto error1; +error2: + kfree(*class); + *class = NULL; +error1: + return ret; +} + +static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, + struct ib_mad_agent_private *agent_priv) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_vendor_class_table **vendor_table; + struct ib_mad_mgmt_vendor_class_table *vendor = NULL; + struct ib_mad_mgmt_vendor_class *vendor_class = NULL; + struct ib_mad_mgmt_method_table **method; + int i, ret = -ENOMEM; + u8 vclass; + + /* "New" vendor (with OUI) class */ + vclass = vendor_class_index(mad_reg_req->mgmt_class); + port_priv = agent_priv->qp_info->port_priv; + vendor_table = &port_priv->version[ + mad_reg_req->mgmt_class_version].vendor; + if (!*vendor_table) { + /* Allocate mgmt vendor class table for "new" class version */ + vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); + if (!vendor) { + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_vendor_class_table\n"); + goto error1; + } + + *vendor_table = vendor; + } + if (!(*vendor_table)->vendor_class[vclass]) { + /* Allocate table for this management vendor class */ + vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); + if (!vendor_class) { + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_vendor_class\n"); + goto error2; + } + + (*vendor_table)->vendor_class[vclass] = vendor_class; + } + for (i = 0; i < MAX_MGMT_OUI; i++) { + /* Is there matching OUI for this vendor class ? */ + if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], + mad_reg_req->oui, 3)) { + method = &(*vendor_table)->vendor_class[ + vclass]->method_table[i]; + BUG_ON(!*method); + goto check_in_use; + } + } + for (i = 0; i < MAX_MGMT_OUI; i++) { + /* OUI slot available ? */ + if (!is_vendor_oui((*vendor_table)->vendor_class[ + vclass]->oui[i])) { + method = &(*vendor_table)->vendor_class[ + vclass]->method_table[i]; + BUG_ON(*method); + /* Allocate method table for this OUI */ + if ((ret = allocate_method_table(method))) + goto error3; + memcpy((*vendor_table)->vendor_class[vclass]->oui[i], + mad_reg_req->oui, 3); + goto check_in_use; + } + } + dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); + goto error3; + +check_in_use: + /* Now, make sure methods are not already in use */ + if (method_in_use(method, mad_reg_req)) + goto error4; + + /* Finally, add in methods being registered */ + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) + (*method)->agent[i] = agent_priv; + + return 0; + +error4: + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(*method, agent_priv); + /* Now, check to see if there are any methods in use */ + if (!check_method_table(*method)) { + /* If not, release management method table */ + kfree(*method); + *method = NULL; + } + ret = -EINVAL; +error3: + if (vendor_class) { + (*vendor_table)->vendor_class[vclass] = NULL; + kfree(vendor_class); + } +error2: + if (vendor) { + *vendor_table = NULL; + kfree(vendor); + } +error1: + return ret; +} + +static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) +{ + struct ib_mad_port_private *port_priv; + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_method_table *method; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + int index; + u8 mgmt_class; + + /* + * Was MAD registration request supplied + * with original registration ? + */ + if (!agent_priv->reg_req) { + goto out; + } + + port_priv = agent_priv->qp_info->port_priv; + mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); + class = port_priv->version[ + agent_priv->reg_req->mgmt_class_version].class; + if (!class) + goto vendor_check; + + method = class->method_table[mgmt_class]; + if (method) { + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(method, agent_priv); + /* Now, check to see if there are any methods still in use */ + if (!check_method_table(method)) { + /* If not, release management method table */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ + if (!check_class_table(class)) { + /* If not, release management class table */ + kfree(class); + port_priv->version[ + agent_priv->reg_req-> + mgmt_class_version].class = NULL; + } + } + } + +vendor_check: + if (!is_vendor_class(mgmt_class)) + goto out; + + /* normalize mgmt_class to vendor range 2 */ + mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); + vendor = port_priv->version[ + agent_priv->reg_req->mgmt_class_version].vendor; + + if (!vendor) + goto out; + + vendor_class = vendor->vendor_class[mgmt_class]; + if (vendor_class) { + index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); + if (index < 0) + goto out; + method = vendor_class->method_table[index]; + if (method) { + /* Remove any methods for this mad agent */ + remove_methods_mad_agent(method, agent_priv); + /* + * Now, check to see if there are + * any methods still in use + */ + if (!check_method_table(method)) { + /* If not, release management method table */ + kfree(method); + vendor_class->method_table[index] = NULL; + memset(vendor_class->oui[index], 0, 3); + /* Any OUIs left ? */ + if (!check_vendor_class(vendor_class)) { + /* If not, release vendor class table */ + kfree(vendor_class); + vendor->vendor_class[mgmt_class] = NULL; + /* Any other vendor classes left ? */ + if (!check_vendor_table(vendor)) { + kfree(vendor); + port_priv->version[ + agent_priv->reg_req-> + mgmt_class_version]. + vendor = NULL; + } + } + } + } + } + +out: + return; +} + +static struct ib_mad_agent_private * +find_mad_agent(struct ib_mad_port_private *port_priv, + const struct ib_mad_hdr *mad_hdr) +{ + struct ib_mad_agent_private *mad_agent = NULL; + unsigned long flags; + + spin_lock_irqsave(&port_priv->reg_lock, flags); + if (ib_response_mad(mad_hdr)) { + u32 hi_tid; + struct ib_mad_agent_private *entry; + + /* + * Routing is based on high 32 bits of transaction ID + * of MAD. + */ + hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; + list_for_each_entry(entry, &port_priv->agent_list, agent_list) { + if (entry->agent.hi_tid == hi_tid) { + mad_agent = entry; + break; + } + } + } else { + struct ib_mad_mgmt_class_table *class; + struct ib_mad_mgmt_method_table *method; + struct ib_mad_mgmt_vendor_class_table *vendor; + struct ib_mad_mgmt_vendor_class *vendor_class; + const struct ib_vendor_mad *vendor_mad; + int index; + + /* + * Routing is based on version, class, and method + * For "newer" vendor MADs, also based on OUI + */ + if (mad_hdr->class_version >= MAX_MGMT_VERSION) + goto out; + if (!is_vendor_class(mad_hdr->mgmt_class)) { + class = port_priv->version[ + mad_hdr->class_version].class; + if (!class) + goto out; + if (convert_mgmt_class(mad_hdr->mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; + method = class->method_table[convert_mgmt_class( + mad_hdr->mgmt_class)]; + if (method) + mad_agent = method->agent[mad_hdr->method & + ~IB_MGMT_METHOD_RESP]; + } else { + vendor = port_priv->version[ + mad_hdr->class_version].vendor; + if (!vendor) + goto out; + vendor_class = vendor->vendor_class[vendor_class_index( + mad_hdr->mgmt_class)]; + if (!vendor_class) + goto out; + /* Find matching OUI */ + vendor_mad = (const struct ib_vendor_mad *)mad_hdr; + index = find_vendor_oui(vendor_class, vendor_mad->oui); + if (index == -1) + goto out; + method = vendor_class->method_table[index]; + if (method) { + mad_agent = method->agent[mad_hdr->method & + ~IB_MGMT_METHOD_RESP]; + } + } + } + + if (mad_agent) { + if (mad_agent->agent.recv_handler) + atomic_inc(&mad_agent->refcount); + else { + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); + mad_agent = NULL; + } + } +out: + spin_unlock_irqrestore(&port_priv->reg_lock, flags); + + return mad_agent; +} + +static int validate_mad(const struct ib_mad_hdr *mad_hdr, + const struct ib_mad_qp_info *qp_info, + bool opa) +{ + int valid = 0; + u32 qp_num = qp_info->qp->qp_num; + + /* Make sure MAD base version is understood */ + if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && + (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { + pr_err("MAD received with unsupported base version %d %s\n", + mad_hdr->base_version, opa ? "(opa)" : ""); + goto out; + } + + /* Filter SMI packets sent to other than QP0 */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if (qp_num == 0) + valid = 1; + } else { + /* CM attributes other than ClassPortInfo only use Send method */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && + (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && + (mad_hdr->method != IB_MGMT_METHOD_SEND)) + goto out; + /* Filter GSI packets sent to QP0 */ + if (qp_num != 0) + valid = 1; + } + +out: + return valid; +} + +static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_hdr *mad_hdr) +{ + const struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (const struct ib_rmpp_mad *)mad_hdr; + return !mad_agent_priv->agent.rmpp_version || + !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || + !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE) || + (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); +} + +static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) +{ + return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == + rwc->recv_buf.mad->mad_hdr.mgmt_class; +} + +static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc ) +{ + struct ib_ah_attr attr; + u8 send_resp, rcv_resp; + union ib_gid sgid; + struct ib_device *device = mad_agent_priv->agent.device; + u8 port_num = mad_agent_priv->agent.port_num; + u8 lmc; + + send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); + rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); + + if (send_resp == rcv_resp) + /* both requests, or both responses. GIDs different */ + return 0; + + if (ib_query_ah(wr->send_buf.ah, &attr)) + /* Assume not equal, to avoid false positives. */ + return 0; + + if (!!(attr.ah_flags & IB_AH_GRH) != + !!(rwc->wc->wc_flags & IB_WC_GRH)) + /* one has GID, other does not. Assume different */ + return 0; + + if (!send_resp && rcv_resp) { + /* is request/response. */ + if (!(attr.ah_flags & IB_AH_GRH)) { + if (ib_get_cached_lmc(device, port_num, &lmc)) + return 0; + return (!lmc || !((attr.src_path_bits ^ + rwc->wc->dlid_path_bits) & + ((1 << lmc) - 1))); + } else { + if (ib_get_cached_gid(device, port_num, + attr.grh.sgid_index, &sgid, NULL)) + return 0; + return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, + 16); + } + } + + if (!(attr.ah_flags & IB_AH_GRH)) + return attr.dlid == rwc->wc->slid; + else + return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, + 16); +} + +static inline int is_direct(u8 class) +{ + return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); +} + +struct ib_mad_send_wr_private* +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *wc) +{ + struct ib_mad_send_wr_private *wr; + const struct ib_mad_hdr *mad_hdr; + + mad_hdr = &wc->recv_buf.mad->mad_hdr; + + list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { + if ((wr->tid == mad_hdr->tid) && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(mad_hdr->mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + } + + /* + * It's possible to receive the response before we've + * been notified that the send has completed + */ + list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { + if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad_hdr->tid && + wr->timeout && + rcv_has_same_class(wr, wc) && + /* + * Don't check GID for direct routed MADs. + * These might have permissive LIDs. + */ + (is_direct(mad_hdr->mgmt_class) || + rcv_has_same_gid(mad_agent_priv, wr, wc))) + /* Verify request has not been canceled */ + return (wr->status == IB_WC_SUCCESS) ? wr : NULL; + } + return NULL; +} + +void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) +{ + mad_send_wr->timeout = 0; + if (mad_send_wr->refcount == 1) + list_move_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->done_list); +} + +static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags; + + INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); + list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { + mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, + mad_recv_wc); + if (!mad_recv_wc) { + deref_mad_agent(mad_agent_priv); + return; + } + } + + /* Complete corresponding request */ + if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { + spin_lock_irqsave(&mad_agent_priv->lock, flags); + mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); + if (!mad_send_wr) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) + && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) + & IB_MGMT_RMPP_FLAG_ACTIVE)) { + /* user rmpp is in effect + * and this is an active RMPP MAD + */ + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, NULL, + mad_recv_wc); + atomic_dec(&mad_agent_priv->refcount); + } else { + /* not user rmpp, revert to normal behavior and + * drop the mad */ + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + } else { + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + /* Defined behavior is to complete response before request */ + mad_agent_priv->agent.recv_handler( + &mad_agent_priv->agent, + &mad_send_wr->send_buf, + mad_recv_wc); + atomic_dec(&mad_agent_priv->refcount); + + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + } + } else { + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, + mad_recv_wc); + deref_mad_agent(mad_agent_priv); + } +} + +static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, + const struct ib_mad_qp_info *qp_info, + const struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct ib_smp *smp = (struct ib_smp *)recv->mad; + + if (smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(smp), + qp_info->qp->qp_num, + response->mad_size, + false); + + return IB_SMI_DISCARD; + } + return IB_SMI_HANDLE; +} + +static bool generate_unmatched_resp(const struct ib_mad_private *recv, + struct ib_mad_private *response, + size_t *resp_len, bool opa) +{ + const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; + struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; + + if (recv_hdr->method == IB_MGMT_METHOD_GET || + recv_hdr->method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + resp_hdr->method = IB_MGMT_METHOD_GET_RESP; + resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + resp_hdr->status |= IB_SMP_DIRECTION; + + if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { + if (recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED || + recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + *resp_len = opa_get_smp_header_size( + (const struct opa_smp *)recv->mad); + else + *resp_len = sizeof(struct ib_mad_hdr); + } + + return true; + } else { + return false; + } +} + +static enum smi_action +handle_opa_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct opa_smp *smp = (struct opa_smp *)recv->mad; + + if (opa_smi_handle_dr_smp_recv(smp, + rdma_cap_ib_switch(port_priv->device), + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = opa_smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (opa_smi_handle_dr_smp_send(smp, + rdma_cap_ib_switch(port_priv->device), + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (opa_smi_check_local_smp(smp, port_priv->device) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + } else if (rdma_cap_ib_switch(port_priv->device)) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.opa_mad = + (struct opa_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + opa_smi_get_fwd_port(smp), + qp_info->qp->qp_num, + recv->header.wc.byte_len, + true); + + return IB_SMI_DISCARD; + } + + return IB_SMI_HANDLE; +} + +static enum smi_action +handle_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response, + bool opa) +{ + struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; + + if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && + mad_hdr->class_version == OPA_SMI_CLASS_VERSION) + return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, + response); + + return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); +} + +static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_mad_port_private *port_priv = cq->cq_context; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_qp_info *qp_info; + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *recv, *response = NULL; + struct ib_mad_agent_private *mad_agent; + int port_num; + int ret = IB_MAD_RESULT_SUCCESS; + size_t mad_size; + u16 resp_mad_pkey_index = 0; + bool opa; + + if (list_empty_careful(&port_priv->port_list)) + return; + + if (wc->status != IB_WC_SUCCESS) { + /* + * Receive errors indicate that the QP has entered the error + * state - error handling/shutdown code will cleanup + */ + return; + } + + qp_info = mad_list->mad_queue->qp_info; + dequeue_mad(mad_list); + + opa = rdma_cap_opa_mad(qp_info->port_priv->device, + qp_info->port_priv->port_num); + + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, + mad_list); + recv = container_of(mad_priv_hdr, struct ib_mad_private, header); + ib_dma_unmap_single(port_priv->device, + recv->header.mapping, + mad_priv_dma_size(recv), + DMA_FROM_DEVICE); + + /* Setup MAD receive work completion from "normal" work completion */ + recv->header.wc = *wc; + recv->header.recv_wc.wc = &recv->header.wc; + + if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { + recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); + recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; + recv->header.recv_wc.recv_buf.grh = &recv->grh; + + if (atomic_read(&qp_info->snoop_count)) + snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); + + /* Validate MAD */ + if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) + goto out; + + mad_size = recv->mad_size; + response = alloc_mad_private(mad_size, GFP_KERNEL); + if (!response) { + dev_err(&port_priv->device->dev, + "%s: no memory for response buffer\n", __func__); + goto out; + } + + if (rdma_cap_ib_switch(port_priv->device)) + port_num = wc->port_num; + else + port_num = port_priv->port_num; + + if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (handle_smi(port_priv, qp_info, wc, port_num, recv, + response, opa) + == IB_SMI_DISCARD) + goto out; + } + + /* Give driver "right of first refusal" on incoming MAD */ + if (port_priv->device->process_mad) { + ret = port_priv->device->process_mad(port_priv->device, 0, + port_priv->port_num, + wc, &recv->grh, + (const struct ib_mad_hdr *)recv->mad, + recv->mad_size, + (struct ib_mad_hdr *)response->mad, + &mad_size, &resp_mad_pkey_index); + + if (opa) + wc->pkey_index = resp_mad_pkey_index; + + if (ret & IB_MAD_RESULT_SUCCESS) { + if (ret & IB_MAD_RESULT_CONSUMED) + goto out; + if (ret & IB_MAD_RESULT_REPLY) { + agent_send_response((const struct ib_mad_hdr *)response->mad, + &recv->grh, wc, + port_priv->device, + port_num, + qp_info->qp->qp_num, + mad_size, opa); + goto out; + } + } + } + + mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); + if (mad_agent) { + ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); + /* + * recv is freed up in error cases in ib_mad_complete_recv + * or via recv_handler in ib_mad_complete_recv() + */ + recv = NULL; + } else if ((ret & IB_MAD_RESULT_SUCCESS) && + generate_unmatched_resp(recv, response, &mad_size, opa)) { + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, + port_priv->device, port_num, + qp_info->qp->qp_num, mad_size, opa); + } + +out: + /* Post another receive request for this QP */ + if (response) { + ib_mad_post_receive_mads(qp_info, response); + kfree(recv); + } else + ib_mad_post_receive_mads(qp_info, recv); +} + +static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) +{ + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long delay; + + if (list_empty(&mad_agent_priv->wait_list)) { + cancel_delayed_work(&mad_agent_priv->timed_work); + } else { + mad_send_wr = list_entry(mad_agent_priv->wait_list.next, + struct ib_mad_send_wr_private, + agent_list); + + if (time_after(mad_agent_priv->timeout, + mad_send_wr->timeout)) { + mad_agent_priv->timeout = mad_send_wr->timeout; + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); + } + } +} + +static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *temp_mad_send_wr; + struct list_head *list_item; + unsigned long delay; + + mad_agent_priv = mad_send_wr->mad_agent_priv; + list_del(&mad_send_wr->agent_list); + + delay = mad_send_wr->timeout; + mad_send_wr->timeout += jiffies; + + if (delay) { + list_for_each_prev(list_item, &mad_agent_priv->wait_list) { + temp_mad_send_wr = list_entry(list_item, + struct ib_mad_send_wr_private, + agent_list); + if (time_after(mad_send_wr->timeout, + temp_mad_send_wr->timeout)) + break; + } + } + else + list_item = &mad_agent_priv->wait_list; + list_add(&mad_send_wr->agent_list, list_item); + + /* Reschedule a work item if we have a shorter timeout */ + if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); +} + +void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, + int timeout_ms) +{ + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + wait_for_response(mad_send_wr); +} + +/* + * Process a send work completion + */ +void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_mad_agent_private *mad_agent_priv; + unsigned long flags; + int ret; + + mad_agent_priv = mad_send_wr->mad_agent_priv; + spin_lock_irqsave(&mad_agent_priv->lock, flags); + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { + ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); + if (ret == IB_RMPP_RESULT_CONSUMED) + goto done; + } else + ret = IB_RMPP_RESULT_UNHANDLED; + + if (mad_send_wc->status != IB_WC_SUCCESS && + mad_send_wr->status == IB_WC_SUCCESS) { + mad_send_wr->status = mad_send_wc->status; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + + if (--mad_send_wr->refcount > 0) { + if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && + mad_send_wr->status == IB_WC_SUCCESS) { + wait_for_response(mad_send_wr); + } + goto done; + } + + /* Remove send from MAD agent and notify client of completion */ + list_del(&mad_send_wr->agent_list); + adjust_timeout(mad_agent_priv); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (mad_send_wr->status != IB_WC_SUCCESS ) + mad_send_wc->status = mad_send_wr->status; + if (ret == IB_RMPP_RESULT_INTERNAL) + ib_rmpp_send_handler(mad_send_wc); + else + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + mad_send_wc); + + /* Release reference on agent taken when sending */ + deref_mad_agent(mad_agent_priv); + return; +done: + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_mad_port_private *port_priv = cq->cq_context; + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; + struct ib_mad_qp_info *qp_info; + struct ib_mad_queue *send_queue; + struct ib_send_wr *bad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags; + int ret; + + if (list_empty_careful(&port_priv->port_list)) + return; + + if (wc->status != IB_WC_SUCCESS) { + if (!ib_mad_send_error(port_priv, wc)) + return; + } + + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, + mad_list); + send_queue = mad_list->mad_queue; + qp_info = send_queue->qp_info; + +retry: + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->header_mapping, + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); + ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, + mad_send_wr->payload_mapping, + mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); + queued_send_wr = NULL; + spin_lock_irqsave(&send_queue->lock, flags); + list_del(&mad_list->list); + + /* Move queued send to the send queue */ + if (send_queue->count-- > send_queue->max_active) { + mad_list = container_of(qp_info->overflow_list.next, + struct ib_mad_list_head, list); + queued_send_wr = container_of(mad_list, + struct ib_mad_send_wr_private, + mad_list); + list_move_tail(&mad_list->list, &send_queue->list); + } + spin_unlock_irqrestore(&send_queue->lock, flags); + + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = wc->status; + mad_send_wc.vendor_err = wc->vendor_err; + if (atomic_read(&qp_info->snoop_count)) + snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, + IB_MAD_SNOOP_SEND_COMPLETIONS); + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + + if (queued_send_wr) { + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, + &bad_send_wr); + if (ret) { + dev_err(&port_priv->device->dev, + "ib_post_send failed: %d\n", ret); + mad_send_wr = queued_send_wr; + wc->status = IB_WC_LOC_QP_OP_ERR; + goto retry; + } + } +} + +static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_list_head *mad_list; + unsigned long flags; + + spin_lock_irqsave(&qp_info->send_queue.lock, flags); + list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { + mad_send_wr = container_of(mad_list, + struct ib_mad_send_wr_private, + mad_list); + mad_send_wr->retry = 1; + } + spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); +} + +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, + struct ib_wc *wc) +{ + struct ib_mad_list_head *mad_list = + container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); + struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; + struct ib_mad_send_wr_private *mad_send_wr; + int ret; + + /* + * Send errors will transition the QP to SQE - move + * QP to RTS and repost flushed work requests + */ + mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, + mad_list); + if (wc->status == IB_WC_WR_FLUSH_ERR) { + if (mad_send_wr->retry) { + /* Repost send */ + struct ib_send_wr *bad_send_wr; + + mad_send_wr->retry = 0; + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, + &bad_send_wr); + if (!ret) + return false; + } + } else { + struct ib_qp_attr *attr; + + /* Transition QP to RTS and fail offending send */ + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (attr) { + attr->qp_state = IB_QPS_RTS; + attr->cur_qp_state = IB_QPS_SQE; + ret = ib_modify_qp(qp_info->qp, attr, + IB_QP_STATE | IB_QP_CUR_STATE); + kfree(attr); + if (ret) + dev_err(&port_priv->device->dev, + "%s - ib_modify_qp to RTS: %d\n", + __func__, ret); + else + mark_sends_for_retry(qp_info); + } + } + + return true; +} + +static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + unsigned long flags; + struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + struct list_head cancel_list; + + INIT_LIST_HEAD(&cancel_list); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &mad_agent_priv->send_list, agent_list) { + if (mad_send_wr->status == IB_WC_SUCCESS) { + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + } + + /* Empty wait list to prevent receives from finding a request */ + list_splice_init(&mad_agent_priv->wait_list, &cancel_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + /* Report all cancelled requests */ + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + mad_send_wc.vendor_err = 0; + + list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, + &cancel_list, agent_list) { + mad_send_wc.send_buf = &mad_send_wr->send_buf; + list_del(&mad_send_wr->agent_list); + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + atomic_dec(&mad_agent_priv->refcount); + } +} + +static struct ib_mad_send_wr_private* +find_send_wr(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf) +{ + struct ib_mad_send_wr_private *mad_send_wr; + + list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, + agent_list) { + if (&mad_send_wr->send_buf == send_buf) + return mad_send_wr; + } + + list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, + agent_list) { + if (is_rmpp_data_mad(mad_agent_priv, + mad_send_wr->send_buf.mad) && + &mad_send_wr->send_buf == send_buf) + return mad_send_wr; + } + return NULL; +} + +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int active; + + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + mad_send_wr = find_send_wr(mad_agent_priv, send_buf); + if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return -EINVAL; + } + + active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); + if (!timeout_ms) { + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->refcount -= (mad_send_wr->timeout > 0); + } + + mad_send_wr->send_buf.timeout_ms = timeout_ms; + if (active) + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + else + ib_reset_mad_timeout(mad_send_wr, timeout_ms); + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + return 0; +} +EXPORT_SYMBOL(ib_modify_mad); + +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf) +{ + ib_modify_mad(mad_agent, send_buf, 0); +} +EXPORT_SYMBOL(ib_cancel_mad); + +static void local_completions(struct work_struct *work) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_local_private *local; + struct ib_mad_agent_private *recv_mad_agent; + unsigned long flags; + int free_mad; + struct ib_wc wc; + struct ib_mad_send_wc mad_send_wc; + bool opa; + + mad_agent_priv = + container_of(work, struct ib_mad_agent_private, local_work); + + opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->local_list)) { + local = list_entry(mad_agent_priv->local_list.next, + struct ib_mad_local_private, + completion_list); + list_del(&local->completion_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + free_mad = 0; + if (local->mad_priv) { + u8 base_version; + recv_mad_agent = local->recv_mad_agent; + if (!recv_mad_agent) { + dev_err(&mad_agent_priv->agent.device->dev, + "No receive MAD agent for local completion\n"); + free_mad = 1; + goto local_send_completion; + } + + /* + * Defined behavior is to complete response + * before request + */ + build_smp_wc(recv_mad_agent->agent.qp, + local->mad_send_wr->send_wr.wr.wr_cqe, + be16_to_cpu(IB_LID_PERMISSIVE), + local->mad_send_wr->send_wr.pkey_index, + recv_mad_agent->agent.port_num, &wc); + + local->mad_priv->header.recv_wc.wc = &wc; + + base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; + if (opa && base_version == OPA_MGMT_BASE_VERSION) { + local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); + list_add(&local->mad_priv->header.recv_wc.recv_buf.list, + &local->mad_priv->header.recv_wc.rmpp_list); + local->mad_priv->header.recv_wc.recv_buf.grh = NULL; + local->mad_priv->header.recv_wc.recv_buf.mad = + (struct ib_mad *)local->mad_priv->mad; + if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) + snoop_recv(recv_mad_agent->qp_info, + &local->mad_priv->header.recv_wc, + IB_MAD_SNOOP_RECVS); + recv_mad_agent->agent.recv_handler( + &recv_mad_agent->agent, + &local->mad_send_wr->send_buf, + &local->mad_priv->header.recv_wc); + spin_lock_irqsave(&recv_mad_agent->lock, flags); + atomic_dec(&recv_mad_agent->refcount); + spin_unlock_irqrestore(&recv_mad_agent->lock, flags); + } + +local_send_completion: + /* Complete send */ + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &local->mad_send_wr->send_buf; + if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) + snoop_send(mad_agent_priv->qp_info, + &local->mad_send_wr->send_buf, + &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + atomic_dec(&mad_agent_priv->refcount); + if (free_mad) + kfree(local->mad_priv); + kfree(local); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) +{ + int ret; + + if (!mad_send_wr->retries_left) + return -ETIMEDOUT; + + mad_send_wr->retries_left--; + mad_send_wr->send_buf.retries++; + + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + + if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { + ret = ib_retry_rmpp(mad_send_wr); + switch (ret) { + case IB_RMPP_RESULT_UNHANDLED: + ret = ib_send_mad(mad_send_wr); + break; + case IB_RMPP_RESULT_CONSUMED: + ret = 0; + break; + default: + ret = -ECOMM; + break; + } + } else + ret = ib_send_mad(mad_send_wr); + + if (!ret) { + mad_send_wr->refcount++; + list_add_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } + return ret; +} + +static void timeout_sends(struct work_struct *work) +{ + struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc mad_send_wc; + unsigned long flags, delay; + + mad_agent_priv = container_of(work, struct ib_mad_agent_private, + timed_work.work); + mad_send_wc.vendor_err = 0; + + spin_lock_irqsave(&mad_agent_priv->lock, flags); + while (!list_empty(&mad_agent_priv->wait_list)) { + mad_send_wr = list_entry(mad_agent_priv->wait_list.next, + struct ib_mad_send_wr_private, + agent_list); + + if (time_after(mad_send_wr->timeout, jiffies)) { + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(mad_agent_priv->qp_info-> + port_priv->wq, + &mad_agent_priv->timed_work, delay); + break; + } + + list_del(&mad_send_wr->agent_list); + if (mad_send_wr->status == IB_WC_SUCCESS && + !retry_send(mad_send_wr)) + continue; + + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + if (mad_send_wr->status == IB_WC_SUCCESS) + mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; + else + mad_send_wc.status = mad_send_wr->status; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + + atomic_dec(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + } + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); +} + +/* + * Allocate receive MADs and post receive WRs for them + */ +static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, + struct ib_mad_private *mad) +{ + unsigned long flags; + int post, ret; + struct ib_mad_private *mad_priv; + struct ib_sge sg_list; + struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_mad_queue *recv_queue = &qp_info->recv_queue; + + /* Initialize common scatter list fields */ + sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; + + /* Initialize common receive WR fields */ + recv_wr.next = NULL; + recv_wr.sg_list = &sg_list; + recv_wr.num_sge = 1; + + do { + /* Allocate and map receive buffer */ + if (mad) { + mad_priv = mad; + mad = NULL; + } else { + mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), + GFP_ATOMIC); + if (!mad_priv) { + dev_err(&qp_info->port_priv->device->dev, + "No memory for receive buffer\n"); + ret = -ENOMEM; + break; + } + } + sg_list.length = mad_priv_dma_size(mad_priv); + sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, + &mad_priv->grh, + mad_priv_dma_size(mad_priv), + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + break; + } + mad_priv->header.mapping = sg_list.addr; + mad_priv->header.mad_list.mad_queue = recv_queue; + mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; + recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; + + /* Post receive WR */ + spin_lock_irqsave(&recv_queue->lock, flags); + post = (++recv_queue->count < recv_queue->max_active); + list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); + spin_unlock_irqrestore(&recv_queue->lock, flags); + ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); + if (ret) { + spin_lock_irqsave(&recv_queue->lock, flags); + list_del(&mad_priv->header.mad_list.list); + recv_queue->count--; + spin_unlock_irqrestore(&recv_queue->lock, flags); + ib_dma_unmap_single(qp_info->port_priv->device, + mad_priv->header.mapping, + mad_priv_dma_size(mad_priv), + DMA_FROM_DEVICE); + kfree(mad_priv); + dev_err(&qp_info->port_priv->device->dev, + "ib_post_recv failed: %d\n", ret); + break; + } + } while (post); + + return ret; +} + +/* + * Return all the posted receive MADs + */ +static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) +{ + struct ib_mad_private_header *mad_priv_hdr; + struct ib_mad_private *recv; + struct ib_mad_list_head *mad_list; + + if (!qp_info->qp) + return; + + while (!list_empty(&qp_info->recv_queue.list)) { + + mad_list = list_entry(qp_info->recv_queue.list.next, + struct ib_mad_list_head, list); + mad_priv_hdr = container_of(mad_list, + struct ib_mad_private_header, + mad_list); + recv = container_of(mad_priv_hdr, struct ib_mad_private, + header); + + /* Remove from posted receive MAD list */ + list_del(&mad_list->list); + + ib_dma_unmap_single(qp_info->port_priv->device, + recv->header.mapping, + mad_priv_dma_size(recv), + DMA_FROM_DEVICE); + kfree(recv); + } + + qp_info->recv_queue.count = 0; +} + +/* + * Start the port + */ +static int ib_mad_port_start(struct ib_mad_port_private *port_priv) +{ + int ret, i; + struct ib_qp_attr *attr; + struct ib_qp *qp; + u16 pkey_index; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) { + dev_err(&port_priv->device->dev, + "Couldn't kmalloc ib_qp_attr\n"); + return -ENOMEM; + } + + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + IB_DEFAULT_PKEY_FULL, &pkey_index); + if (ret) + pkey_index = 0; + + for (i = 0; i < IB_MAD_QPS_CORE; i++) { + qp = port_priv->qp_info[i].qp; + if (!qp) + continue; + + /* + * PKey index for QP1 is irrelevant but + * one is needed for the Reset to Init transition + */ + attr->qp_state = IB_QPS_INIT; + attr->pkey_index = pkey_index; + attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; + ret = ib_modify_qp(qp, attr, IB_QP_STATE | + IB_QP_PKEY_INDEX | IB_QP_QKEY); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to INIT: %d\n", + i, ret); + goto out; + } + + attr->qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, attr, IB_QP_STATE); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTR: %d\n", + i, ret); + goto out; + } + + attr->qp_state = IB_QPS_RTS; + attr->sq_psn = IB_MAD_SEND_Q_PSN; + ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTS: %d\n", + i, ret); + goto out; + } + } + + ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); + if (ret) { + dev_err(&port_priv->device->dev, + "Failed to request completion notification: %d\n", + ret); + goto out; + } + + for (i = 0; i < IB_MAD_QPS_CORE; i++) { + if (!port_priv->qp_info[i].qp) + continue; + + ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); + if (ret) { + dev_err(&port_priv->device->dev, + "Couldn't post receive WRs\n"); + goto out; + } + } +out: + kfree(attr); + return ret; +} + +static void qp_event_handler(struct ib_event *event, void *qp_context) +{ + struct ib_mad_qp_info *qp_info = qp_context; + + /* It's worse than that! He's dead, Jim! */ + dev_err(&qp_info->port_priv->device->dev, + "Fatal error (%d) on MAD QP (%d)\n", + event->event, qp_info->qp->qp_num); +} + +static void init_mad_queue(struct ib_mad_qp_info *qp_info, + struct ib_mad_queue *mad_queue) +{ + mad_queue->qp_info = qp_info; + mad_queue->count = 0; + spin_lock_init(&mad_queue->lock); + INIT_LIST_HEAD(&mad_queue->list); +} + +static void init_mad_qp(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info) +{ + qp_info->port_priv = port_priv; + init_mad_queue(qp_info, &qp_info->send_queue); + init_mad_queue(qp_info, &qp_info->recv_queue); + INIT_LIST_HEAD(&qp_info->overflow_list); + spin_lock_init(&qp_info->snoop_lock); + qp_info->snoop_table = NULL; + qp_info->snoop_table_size = 0; + atomic_set(&qp_info->snoop_count, 0); +} + +static int create_mad_qp(struct ib_mad_qp_info *qp_info, + enum ib_qp_type qp_type) +{ + struct ib_qp_init_attr qp_init_attr; + int ret; + + memset(&qp_init_attr, 0, sizeof qp_init_attr); + qp_init_attr.send_cq = qp_info->port_priv->cq; + qp_init_attr.recv_cq = qp_info->port_priv->cq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = mad_sendq_size; + qp_init_attr.cap.max_recv_wr = mad_recvq_size; + qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; + qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; + qp_init_attr.qp_type = qp_type; + qp_init_attr.port_num = qp_info->port_priv->port_num; + qp_init_attr.qp_context = qp_info; + qp_init_attr.event_handler = qp_event_handler; + qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); + if (IS_ERR(qp_info->qp)) { + dev_err(&qp_info->port_priv->device->dev, + "Couldn't create ib_mad QP%d\n", + get_spl_qp_index(qp_type)); + ret = PTR_ERR(qp_info->qp); + goto error; + } + /* Use minimum queue sizes unless the CQ is resized */ + qp_info->send_queue.max_active = mad_sendq_size; + qp_info->recv_queue.max_active = mad_recvq_size; + return 0; + +error: + return ret; +} + +static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) +{ + if (!qp_info->qp) + return; + + ib_destroy_qp(qp_info->qp); + kfree(qp_info->snoop_table); +} + +/* + * Open the port + * Create the QP, PD, MR, and CQ if needed + */ +static int ib_mad_port_open(struct ib_device *device, + int port_num) +{ + int ret, cq_size; + struct ib_mad_port_private *port_priv; + unsigned long flags; + char name[sizeof "ib_mad123"]; + int has_smi; + + if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) + return -EFAULT; + + if (WARN_ON(rdma_cap_opa_mad(device, port_num) && + rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) + return -EFAULT; + + /* Create new device info */ + port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); + if (!port_priv) { + dev_err(&device->dev, "No memory for ib_mad_port_private\n"); + return -ENOMEM; + } + + port_priv->device = device; + port_priv->port_num = port_num; + spin_lock_init(&port_priv->reg_lock); + INIT_LIST_HEAD(&port_priv->agent_list); + init_mad_qp(port_priv, &port_priv->qp_info[0]); + init_mad_qp(port_priv, &port_priv->qp_info[1]); + + cq_size = mad_sendq_size + mad_recvq_size; + has_smi = rdma_cap_ib_smi(device, port_num); + if (has_smi) + cq_size *= 2; + + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, + IB_POLL_WORKQUEUE); + if (IS_ERR(port_priv->cq)) { + dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); + ret = PTR_ERR(port_priv->cq); + goto error3; + } + + port_priv->pd = ib_alloc_pd(device, 0); + if (IS_ERR(port_priv->pd)) { + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error4; + } + + if (has_smi) { + ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); + if (ret) + goto error6; + } + ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); + if (ret) + goto error7; + + snprintf(name, sizeof name, "ib_mad%d", port_num); + port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (!port_priv->wq) { + ret = -ENOMEM; + goto error8; + } + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_add_tail(&port_priv->port_list, &ib_mad_port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + ret = ib_mad_port_start(port_priv); + if (ret) { + dev_err(&device->dev, "Couldn't start port\n"); + goto error9; + } + + return 0; + +error9: + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + list_del_init(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + destroy_workqueue(port_priv->wq); +error8: + destroy_mad_qp(&port_priv->qp_info[1]); +error7: + destroy_mad_qp(&port_priv->qp_info[0]); +error6: + ib_dealloc_pd(port_priv->pd); +error4: + ib_free_cq(port_priv->cq); + cleanup_recv_queue(&port_priv->qp_info[1]); + cleanup_recv_queue(&port_priv->qp_info[0]); +error3: + kfree(port_priv); + + return ret; +} + +/* + * Close the port + * If there are no classes using the port, free the port + * resources (CQ, MR, PD, QP) and remove the port's info structure + */ +static int ib_mad_port_close(struct ib_device *device, int port_num) +{ + struct ib_mad_port_private *port_priv; + unsigned long flags; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); + port_priv = __ib_get_mad_port(device, port_num); + if (port_priv == NULL) { + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + dev_err(&device->dev, "Port %d not found\n", port_num); + return -ENODEV; + } + list_del_init(&port_priv->port_list); + spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); + + destroy_workqueue(port_priv->wq); + destroy_mad_qp(&port_priv->qp_info[1]); + destroy_mad_qp(&port_priv->qp_info[0]); + ib_dealloc_pd(port_priv->pd); + ib_free_cq(port_priv->cq); + cleanup_recv_queue(&port_priv->qp_info[1]); + cleanup_recv_queue(&port_priv->qp_info[0]); + /* XXX: Handle deallocation of MAD registration tables */ + + kfree(port_priv); + + return 0; +} + +static void ib_mad_init_device(struct ib_device *device) +{ + int start, i; + + start = rdma_start_port(device); + + for (i = start; i <= rdma_end_port(device); i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_mad_port_open(device, i)) { + dev_err(&device->dev, "Couldn't open port %d\n", i); + goto error; + } + if (ib_agent_port_open(device, i)) { + dev_err(&device->dev, + "Couldn't open port %d for agents\n", i); + goto error_agent; + } + } + return; + +error_agent: + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); + +error: + while (--i >= start) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_agent_port_close(device, i)) + dev_err(&device->dev, + "Couldn't close port %d for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); + } +} + +static void ib_mad_remove_device(struct ib_device *device, void *client_data) +{ + int i; + + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_agent_port_close(device, i)) + dev_err(&device->dev, + "Couldn't close port %d for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); + } +} + +static struct ib_client mad_client = { + .name = "mad", + .add = ib_mad_init_device, + .remove = ib_mad_remove_device +}; + +int ib_mad_init(void) +{ + mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); + mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); + + mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); + mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); + + INIT_LIST_HEAD(&ib_mad_port_list); + + if (ib_register_client(&mad_client)) { + pr_err("Couldn't register ib_mad client\n"); + return -EINVAL; + } + + return 0; +} + +void ib_mad_cleanup(void) +{ + ib_unregister_client(&mad_client); +} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c (revision 320592) @@ -0,0 +1,968 @@ +/* + * Copyright (c) 2005 Intel Inc. All rights reserved. + * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mad_priv.h" +#include "mad_rmpp.h" + +enum rmpp_state { + RMPP_STATE_ACTIVE, + RMPP_STATE_TIMEOUT, + RMPP_STATE_COMPLETE, + RMPP_STATE_CANCELING +}; + +struct mad_rmpp_recv { + struct ib_mad_agent_private *agent; + struct list_head list; + struct delayed_work timeout_work; + struct delayed_work cleanup_work; + struct completion comp; + enum rmpp_state state; + spinlock_t lock; + atomic_t refcount; + + struct ib_ah *ah; + struct ib_mad_recv_wc *rmpp_wc; + struct ib_mad_recv_buf *cur_seg_buf; + int last_ack; + int seg_num; + int newwin; + int repwin; + + __be64 tid; + u32 src_qp; + u16 slid; + u8 mgmt_class; + u8 class_version; + u8 method; + u8 base_version; +}; + +static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + if (atomic_dec_and_test(&rmpp_recv->refcount)) + complete(&rmpp_recv->comp); +} + +static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + deref_rmpp_recv(rmpp_recv); + wait_for_completion(&rmpp_recv->comp); + ib_destroy_ah(rmpp_recv->ah); + kfree(rmpp_recv); +} + +void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) +{ + struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); + rmpp_recv->state = RMPP_STATE_CANCELING; + } + spin_unlock_irqrestore(&agent->lock, flags); + + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + cancel_delayed_work(&rmpp_recv->timeout_work); + cancel_delayed_work(&rmpp_recv->cleanup_work); + } + + flush_workqueue(agent->qp_info->port_priv->wq); + + list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, + &agent->rmpp_list, list) { + list_del(&rmpp_recv->list); + destroy_rmpp_recv(rmpp_recv); + } +} + +static void format_ack(struct ib_mad_send_buf *msg, + struct ib_rmpp_mad *data, + struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_rmpp_mad *ack = msg->mad; + unsigned long flags; + + memcpy(ack, &data->mad_hdr, msg->hdr_len); + + ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; + ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + rmpp_recv->last_ack = rmpp_recv->seg_num; + ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); + ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +} + +static void ack_recv(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + int ret, hdr_len; + + hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); + msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, 1, hdr_len, + 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); + if (IS_ERR(msg)) + return; + + format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); + msg->ah = rmpp_recv->ah; + ret = ib_post_send_mad(msg, NULL); + if (ret) + ib_free_send_mad(msg); +} + +static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_ah *ah; + int hdr_len; + + ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, + recv_wc->recv_buf.grh, agent->port_num); + if (IS_ERR(ah)) + return (void *) ah; + + hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); + msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, 1, + hdr_len, 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); + if (IS_ERR(msg)) + ib_destroy_ah(ah); + else { + msg->ah = ah; + msg->context[0] = ah; + } + + return msg; +} + +static void ack_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + int ret; + + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) + return; + + rmpp_mad = msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1); + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } +} + +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) +{ + if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); +} + +static void nack_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc, u8 rmpp_status) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + int ret; + + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) + return; + + rmpp_mad = msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; + rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status; + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = 0; + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } +} + +static void recv_timeout_handler(struct work_struct *work) +{ + struct mad_rmpp_recv *rmpp_recv = + container_of(work, struct mad_rmpp_recv, timeout_work.work); + struct ib_mad_recv_wc *rmpp_wc; + unsigned long flags; + + spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state != RMPP_STATE_ACTIVE) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } + rmpp_recv->state = RMPP_STATE_TIMEOUT; + list_del(&rmpp_recv->list); + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + + rmpp_wc = rmpp_recv->rmpp_wc; + nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L); + destroy_rmpp_recv(rmpp_recv); + ib_free_recv_mad(rmpp_wc); +} + +static void recv_cleanup_handler(struct work_struct *work) +{ + struct mad_rmpp_recv *rmpp_recv = + container_of(work, struct mad_rmpp_recv, cleanup_work.work); + unsigned long flags; + + spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state == RMPP_STATE_CANCELING) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } + list_del(&rmpp_recv->list); + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + destroy_rmpp_recv(rmpp_recv); +} + +static struct mad_rmpp_recv * +create_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_hdr *mad_hdr; + + rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL); + if (!rmpp_recv) + return NULL; + + rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd, + mad_recv_wc->wc, + mad_recv_wc->recv_buf.grh, + agent->agent.port_num); + if (IS_ERR(rmpp_recv->ah)) + goto error; + + rmpp_recv->agent = agent; + init_completion(&rmpp_recv->comp); + INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler); + INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); + spin_lock_init(&rmpp_recv->lock); + rmpp_recv->state = RMPP_STATE_ACTIVE; + atomic_set(&rmpp_recv->refcount, 1); + + rmpp_recv->rmpp_wc = mad_recv_wc; + rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; + rmpp_recv->newwin = 1; + rmpp_recv->seg_num = 1; + rmpp_recv->last_ack = 0; + rmpp_recv->repwin = 1; + + mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; + rmpp_recv->tid = mad_hdr->tid; + rmpp_recv->src_qp = mad_recv_wc->wc->src_qp; + rmpp_recv->slid = mad_recv_wc->wc->slid; + rmpp_recv->mgmt_class = mad_hdr->mgmt_class; + rmpp_recv->class_version = mad_hdr->class_version; + rmpp_recv->method = mad_hdr->method; + rmpp_recv->base_version = mad_hdr->base_version; + return rmpp_recv; + +error: kfree(rmpp_recv); + return NULL; +} + +static struct mad_rmpp_recv * +find_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; + + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid == mad_hdr->tid && + rmpp_recv->src_qp == mad_recv_wc->wc->src_qp && + rmpp_recv->slid == mad_recv_wc->wc->slid && + rmpp_recv->mgmt_class == mad_hdr->mgmt_class && + rmpp_recv->class_version == mad_hdr->class_version && + rmpp_recv->method == mad_hdr->method) + return rmpp_recv; + } + return NULL; +} + +static struct mad_rmpp_recv * +acquire_rmpp_recv(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv) + atomic_inc(&rmpp_recv->refcount); + spin_unlock_irqrestore(&agent->lock, flags); + return rmpp_recv; +} + +static struct mad_rmpp_recv * +insert_rmpp_recv(struct ib_mad_agent_private *agent, + struct mad_rmpp_recv *rmpp_recv) +{ + struct mad_rmpp_recv *cur_rmpp_recv; + + cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc); + if (!cur_rmpp_recv) + list_add_tail(&rmpp_recv->list, &agent->rmpp_list); + + return cur_rmpp_recv; +} + +static inline int get_last_flag(struct ib_mad_recv_buf *seg) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *) seg->mad; + return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST; +} + +static inline int get_seg_num(struct ib_mad_recv_buf *seg) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *) seg->mad; + return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); +} + +static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list, + struct ib_mad_recv_buf *seg) +{ + if (seg->list.next == rmpp_list) + return NULL; + + return container_of(seg->list.next, struct ib_mad_recv_buf, list); +} + +static inline int window_size(struct ib_mad_agent_private *agent) +{ + return max(agent->qp_info->recv_queue.max_active >> 3, 1); +} + +static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, + int seg_num) +{ + struct ib_mad_recv_buf *seg_buf; + int cur_seg_num; + + list_for_each_entry_reverse(seg_buf, rmpp_list, list) { + cur_seg_num = get_seg_num(seg_buf); + if (seg_num > cur_seg_num) + return seg_buf; + if (seg_num == cur_seg_num) + break; + } + return NULL; +} + +static void update_seg_num(struct mad_rmpp_recv *rmpp_recv, + struct ib_mad_recv_buf *new_buf) +{ + struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list; + + while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) { + rmpp_recv->cur_seg_buf = new_buf; + rmpp_recv->seg_num++; + new_buf = get_next_seg(rmpp_list, new_buf); + } +} + +static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_rmpp_mad *rmpp_mad; + int hdr_size, data_size, pad; + bool opa = rdma_cap_opa_mad(rmpp_recv->agent->qp_info->port_priv->device, + rmpp_recv->agent->qp_info->port_priv->port_num); + + rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; + + hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + if (opa && rmpp_recv->base_version == OPA_MGMT_BASE_VERSION) { + data_size = sizeof(struct opa_rmpp_mad) - hdr_size; + pad = OPA_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > OPA_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } else { + data_size = sizeof(struct ib_rmpp_mad) - hdr_size; + pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > IB_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } + + return hdr_size + rmpp_recv->seg_num * data_size - pad; +} + +static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) +{ + struct ib_mad_recv_wc *rmpp_wc; + + ack_recv(rmpp_recv, rmpp_recv->rmpp_wc); + if (rmpp_recv->seg_num > 1) + cancel_delayed_work(&rmpp_recv->timeout_work); + + rmpp_wc = rmpp_recv->rmpp_wc; + rmpp_wc->mad_len = get_mad_len(rmpp_recv); + /* 10 seconds until we can find the packet lifetime */ + queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq, + &rmpp_recv->cleanup_work, msecs_to_jiffies(10000)); + return rmpp_wc; +} + +static struct ib_mad_recv_wc * +continue_rmpp(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + struct ib_mad_recv_buf *prev_buf; + struct ib_mad_recv_wc *done_wc; + int seg_num; + unsigned long flags; + + rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc); + if (!rmpp_recv) + goto drop1; + + seg_num = get_seg_num(&mad_recv_wc->recv_buf); + + spin_lock_irqsave(&rmpp_recv->lock, flags); + if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) || + (seg_num > rmpp_recv->newwin)) + goto drop3; + + if ((seg_num <= rmpp_recv->last_ack) || + (rmpp_recv->state == RMPP_STATE_COMPLETE)) { + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + ack_recv(rmpp_recv, mad_recv_wc); + goto drop2; + } + + prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num); + if (!prev_buf) + goto drop3; + + done_wc = NULL; + list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list); + if (rmpp_recv->cur_seg_buf == prev_buf) { + update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf); + if (get_last_flag(rmpp_recv->cur_seg_buf)) { + rmpp_recv->state = RMPP_STATE_COMPLETE; + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + done_wc = complete_rmpp(rmpp_recv); + goto out; + } else if (rmpp_recv->seg_num == rmpp_recv->newwin) { + rmpp_recv->newwin += window_size(agent); + spin_unlock_irqrestore(&rmpp_recv->lock, flags); + ack_recv(rmpp_recv, mad_recv_wc); + goto out; + } + } + spin_unlock_irqrestore(&rmpp_recv->lock, flags); +out: + deref_rmpp_recv(rmpp_recv); + return done_wc; + +drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags); +drop2: deref_rmpp_recv(rmpp_recv); +drop1: ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static struct ib_mad_recv_wc * +start_rmpp(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct mad_rmpp_recv *rmpp_recv; + unsigned long flags; + + rmpp_recv = create_rmpp_recv(agent, mad_recv_wc); + if (!rmpp_recv) { + ib_free_recv_mad(mad_recv_wc); + return NULL; + } + + spin_lock_irqsave(&agent->lock, flags); + if (insert_rmpp_recv(agent, rmpp_recv)) { + spin_unlock_irqrestore(&agent->lock, flags); + /* duplicate first MAD */ + destroy_rmpp_recv(rmpp_recv); + return continue_rmpp(agent, mad_recv_wc); + } + atomic_inc(&rmpp_recv->refcount); + + if (get_last_flag(&mad_recv_wc->recv_buf)) { + rmpp_recv->state = RMPP_STATE_COMPLETE; + spin_unlock_irqrestore(&agent->lock, flags); + complete_rmpp(rmpp_recv); + } else { + spin_unlock_irqrestore(&agent->lock, flags); + /* 40 seconds until we can find the packet lifetimes */ + queue_delayed_work(agent->qp_info->port_priv->wq, + &rmpp_recv->timeout_work, + msecs_to_jiffies(40000)); + rmpp_recv->newwin += window_size(agent); + ack_recv(rmpp_recv, mad_recv_wc); + mad_recv_wc = NULL; + } + deref_rmpp_recv(rmpp_recv); + return mad_recv_wc; +} + +static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int timeout; + u32 paylen = 0; + + rmpp_mad = mad_send_wr->send_buf.mad; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num); + + if (mad_send_wr->seg_num == 1) { + rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; + paylen = (mad_send_wr->send_buf.seg_count * + mad_send_wr->send_buf.seg_rmpp_size) - + mad_send_wr->pad; + } + + if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { + rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; + paylen = mad_send_wr->send_buf.seg_rmpp_size - mad_send_wr->pad; + } + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); + + /* 2 seconds for an ACK until we can find the packet lifetime */ + timeout = mad_send_wr->send_buf.timeout_ms; + if (!timeout || timeout > 2000) + mad_send_wr->timeout = msecs_to_jiffies(2000); + + return ib_send_mad(mad_send_wr); +} + +static void abort_send(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_mad_send_wc wc; + unsigned long flags; + + spin_lock_irqsave(&agent->lock, flags); + mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); + if (!mad_send_wr) + goto out; /* Unmatched send */ + + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || + (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) + goto out; /* Send is already done */ + + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&agent->lock, flags); + + wc.status = IB_WC_REM_ABORT_ERR; + wc.vendor_err = rmpp_status; + wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, &wc); + return; +out: + spin_unlock_irqrestore(&agent->lock, flags); +} + +static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, + int seg_num) +{ + struct list_head *list; + + wr->last_ack = seg_num; + list = &wr->last_ack_seg->list; + list_for_each_entry(wr->last_ack_seg, list, list) + if (wr->last_ack_seg->num == seg_num) + break; +} + +static void process_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc, int newwin) +{ + struct mad_rmpp_recv *rmpp_recv; + + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE) + rmpp_recv->repwin = newwin; +} + +static void process_rmpp_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct ib_rmpp_mad *rmpp_mad; + unsigned long flags; + int seg_num, newwin, ret; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + if (rmpp_mad->rmpp_hdr.rmpp_status) { + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + return; + } + + seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); + newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (newwin < seg_num) { + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); + return; + } + + spin_lock_irqsave(&agent->lock, flags); + mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); + if (!mad_send_wr) { + if (!seg_num) + process_ds_ack(agent, mad_recv_wc, newwin); + goto out; /* Unmatched or DS RMPP ACK */ + } + + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) && + (mad_send_wr->timeout)) { + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; /* Repeated ACK for DS RMPP transaction */ + } + + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || + (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) + goto out; /* Send is already done */ + + if (seg_num > mad_send_wr->send_buf.seg_count || + seg_num > mad_send_wr->newwin) { + spin_unlock_irqrestore(&agent->lock, flags); + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); + return; + } + + if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack) + goto out; /* Old ACK */ + + if (seg_num > mad_send_wr->last_ack) { + adjust_last_ack(mad_send_wr, seg_num); + mad_send_wr->retries_left = mad_send_wr->max_retries; + } + mad_send_wr->newwin = newwin; + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { + /* If no response is expected, the ACK completes the send */ + if (!mad_send_wr->send_buf.timeout_ms) { + struct ib_mad_send_wc wc; + + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&agent->lock, flags); + + wc.status = IB_WC_SUCCESS; + wc.vendor_err = 0; + wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, &wc); + return; + } + if (mad_send_wr->refcount == 1) + ib_reset_mad_timeout(mad_send_wr, + mad_send_wr->send_buf.timeout_ms); + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; + } else if (mad_send_wr->refcount == 1 && + mad_send_wr->seg_num < mad_send_wr->newwin && + mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { + /* Send failure will just result in a timeout/retry */ + ret = send_next_seg(mad_send_wr); + if (ret) + goto out; + + mad_send_wr->refcount++; + list_move_tail(&mad_send_wr->agent_list, + &mad_send_wr->mad_agent_priv->send_list); + } +out: + spin_unlock_irqrestore(&agent->lock, flags); +} + +static struct ib_mad_recv_wc * +process_rmpp_data(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_hdr *rmpp_hdr; + u8 rmpp_status; + + rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr; + + if (rmpp_hdr->rmpp_status) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS; + goto bad; + } + + if (rmpp_hdr->seg_num == cpu_to_be32(1)) { + if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; + goto bad; + } + return start_rmpp(agent, mad_recv_wc); + } else { + if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) { + rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; + goto bad; + } + return continue_rmpp(agent, mad_recv_wc); + } +bad: + nack_recv(agent, mad_recv_wc, rmpp_status); + ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static void process_rmpp_stop(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + + if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + } else + abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); +} + +static void process_rmpp_abort(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + + if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || + rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); + } else + abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); +} + +struct ib_mad_recv_wc * +ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + + rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; + if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE)) + return mad_recv_wc; + + if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); + goto out; + } + + switch (rmpp_mad->rmpp_hdr.rmpp_type) { + case IB_MGMT_RMPP_TYPE_DATA: + return process_rmpp_data(agent, mad_recv_wc); + case IB_MGMT_RMPP_TYPE_ACK: + process_rmpp_ack(agent, mad_recv_wc); + break; + case IB_MGMT_RMPP_TYPE_STOP: + process_rmpp_stop(agent, mad_recv_wc); + break; + case IB_MGMT_RMPP_TYPE_ABORT: + process_rmpp_abort(agent, mad_recv_wc); + break; + default: + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); + nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); + break; + } +out: + ib_free_recv_mad(mad_recv_wc); + return NULL; +} + +static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; + struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; + struct mad_rmpp_recv *rmpp_recv; + struct ib_ah_attr ah_attr; + unsigned long flags; + int newwin = 1; + + if (!(mad_hdr->method & IB_MGMT_METHOD_RESP)) + goto out; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid != mad_hdr->tid || + rmpp_recv->mgmt_class != mad_hdr->mgmt_class || + rmpp_recv->class_version != mad_hdr->class_version || + (rmpp_recv->method & IB_MGMT_METHOD_RESP)) + continue; + + if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) + continue; + + if (rmpp_recv->slid == ah_attr.dlid) { + newwin = rmpp_recv->repwin; + break; + } + } + spin_unlock_irqrestore(&agent->lock, flags); +out: + return newwin; +} + +int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int ret; + + rmpp_mad = mad_send_wr->send_buf.mad; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { + mad_send_wr->seg_num = 1; + return IB_RMPP_RESULT_INTERNAL; + } + + mad_send_wr->newwin = init_newwin(mad_send_wr); + + /* We need to wait for the final ACK even if there isn't a response */ + mad_send_wr->refcount += (mad_send_wr->timeout == 0); + ret = send_next_seg(mad_send_wr); + if (!ret) + return IB_RMPP_RESULT_CONSUMED; + return ret; +} + +int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_rmpp_mad *rmpp_mad; + int ret; + + rmpp_mad = mad_send_wr->send_buf.mad; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) + return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ + + if (mad_send_wc->status != IB_WC_SUCCESS || + mad_send_wr->status != IB_WC_SUCCESS) + return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */ + + if (!mad_send_wr->timeout) + return IB_RMPP_RESULT_PROCESSED; /* Response received */ + + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { + mad_send_wr->timeout = + msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); + return IB_RMPP_RESULT_PROCESSED; /* Send done */ + } + + if (mad_send_wr->seg_num == mad_send_wr->newwin || + mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) + return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ + + ret = send_next_seg(mad_send_wr); + if (ret) { + mad_send_wc->status = IB_WC_GENERAL_ERR; + return IB_RMPP_RESULT_PROCESSED; + } + return IB_RMPP_RESULT_CONSUMED; +} + +int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_rmpp_mad *rmpp_mad; + int ret; + + rmpp_mad = mad_send_wr->send_buf.mad; + if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE)) + return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ + + if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) + return IB_RMPP_RESULT_PROCESSED; + + mad_send_wr->seg_num = mad_send_wr->last_ack; + mad_send_wr->cur_seg = mad_send_wr->last_ack_seg; + + ret = send_next_seg(mad_send_wr); + if (ret) + return IB_RMPP_RESULT_PROCESSED; + + return IB_RMPP_RESULT_CONSUMED; +} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_multicast.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_multicast.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_multicast.c (revision 320592) @@ -0,0 +1,900 @@ +/* + * Copyright (c) 2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define LINUXKPI_PARAM_PREFIX ibcore_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "sa.h" + +static void mcast_add_one(struct ib_device *device); +static void mcast_remove_one(struct ib_device *device, void *client_data); + +static struct ib_client mcast_client = { + .name = "ib_multicast", + .add = mcast_add_one, + .remove = mcast_remove_one +}; + +static struct ib_sa_client sa_client; +static struct workqueue_struct *mcast_wq; +static union ib_gid mgid0; + +struct mcast_device; + +struct mcast_port { + struct mcast_device *dev; + spinlock_t lock; + struct rb_root table; + atomic_t refcount; + struct completion comp; + u8 port_num; +}; + +struct mcast_device { + struct ib_device *device; + struct ib_event_handler event_handler; + int start_port; + int end_port; + struct mcast_port port[0]; +}; + +enum mcast_state { + MCAST_JOINING, + MCAST_MEMBER, + MCAST_ERROR, +}; + +enum mcast_group_state { + MCAST_IDLE, + MCAST_BUSY, + MCAST_GROUP_ERROR, + MCAST_PKEY_EVENT +}; + +enum { + MCAST_INVALID_PKEY_INDEX = 0xFFFF +}; + +struct mcast_member; + +struct mcast_group { + struct ib_sa_mcmember_rec rec; + struct rb_node node; + struct mcast_port *port; + spinlock_t lock; + struct work_struct work; + struct list_head pending_list; + struct list_head active_list; + struct mcast_member *last_join; + int members[NUM_JOIN_MEMBERSHIP_TYPES]; + atomic_t refcount; + enum mcast_group_state state; + struct ib_sa_query *query; + u16 pkey_index; + u8 leave_state; + int retries; +}; + +struct mcast_member { + struct ib_sa_multicast multicast; + struct ib_sa_client *client; + struct mcast_group *group; + struct list_head list; + enum mcast_state state; + atomic_t refcount; + struct completion comp; +}; + +static void join_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context); +static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context); + +static struct mcast_group *mcast_find(struct mcast_port *port, + union ib_gid *mgid) +{ + struct rb_node *node = port->table.rb_node; + struct mcast_group *group; + int ret; + + while (node) { + group = rb_entry(node, struct mcast_group, node); + ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); + if (!ret) + return group; + + if (ret < 0) + node = node->rb_left; + else + node = node->rb_right; + } + return NULL; +} + +static struct mcast_group *mcast_insert(struct mcast_port *port, + struct mcast_group *group, + int allow_duplicates) +{ + struct rb_node **link = &port->table.rb_node; + struct rb_node *parent = NULL; + struct mcast_group *cur_group; + int ret; + + while (*link) { + parent = *link; + cur_group = rb_entry(parent, struct mcast_group, node); + + ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, + sizeof group->rec.mgid); + if (ret < 0) + link = &(*link)->rb_left; + else if (ret > 0) + link = &(*link)->rb_right; + else if (allow_duplicates) + link = &(*link)->rb_left; + else + return cur_group; + } + rb_link_node(&group->node, parent, link); + rb_insert_color(&group->node, &port->table); + return NULL; +} + +static void deref_port(struct mcast_port *port) +{ + if (atomic_dec_and_test(&port->refcount)) + complete(&port->comp); +} + +static void release_group(struct mcast_group *group) +{ + struct mcast_port *port = group->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + if (atomic_dec_and_test(&group->refcount)) { + rb_erase(&group->node, &port->table); + spin_unlock_irqrestore(&port->lock, flags); + kfree(group); + deref_port(port); + } else + spin_unlock_irqrestore(&port->lock, flags); +} + +static void deref_member(struct mcast_member *member) +{ + if (atomic_dec_and_test(&member->refcount)) + complete(&member->comp); +} + +static void queue_join(struct mcast_member *member) +{ + struct mcast_group *group = member->group; + unsigned long flags; + + spin_lock_irqsave(&group->lock, flags); + list_add_tail(&member->list, &group->pending_list); + if (group->state == MCAST_IDLE) { + group->state = MCAST_BUSY; + atomic_inc(&group->refcount); + queue_work(mcast_wq, &group->work); + } + spin_unlock_irqrestore(&group->lock, flags); +} + +/* + * A multicast group has four types of members: full member, non member, + * sendonly non member and sendonly full member. + * We need to keep track of the number of members of each + * type based on their join state. Adjust the number of members the belong to + * the specified join states. + */ +static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) +{ + int i; + + for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) + if (join_state & 0x1) + group->members[i] += inc; +} + +/* + * If a multicast group has zero members left for a particular join state, but + * the group is still a member with the SA, we need to leave that join state. + * Determine which join states we still belong to, but that do not have any + * active members. + */ +static u8 get_leave_state(struct mcast_group *group) +{ + u8 leave_state = 0; + int i; + + for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) + if (!group->members[i]) + leave_state |= (0x1 << i); + + return leave_state & group->rec.join_state; +} + +static int check_selector(ib_sa_comp_mask comp_mask, + ib_sa_comp_mask selector_mask, + ib_sa_comp_mask value_mask, + u8 selector, u8 src_value, u8 dst_value) +{ + int err; + + if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) + return 0; + + switch (selector) { + case IB_SA_GT: + err = (src_value <= dst_value); + break; + case IB_SA_LT: + err = (src_value >= dst_value); + break; + case IB_SA_EQ: + err = (src_value != dst_value); + break; + default: + err = 0; + break; + } + + return err; +} + +static int cmp_rec(struct ib_sa_mcmember_rec *src, + struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) +{ + /* MGID must already match */ + + if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && + memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) + return -EINVAL; + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, + IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, + src->mtu, dst->mtu)) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && + src->traffic_class != dst->traffic_class) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) + return -EINVAL; + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, + IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, + src->rate, dst->rate)) + return -EINVAL; + if (check_selector(comp_mask, + IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, + IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, + dst->packet_life_time_selector, + src->packet_life_time, dst->packet_life_time)) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && + src->flow_label != dst->flow_label) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && + src->hop_limit != dst->hop_limit) + return -EINVAL; + if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) + return -EINVAL; + + /* join_state checked separately, proxy_join ignored */ + + return 0; +} + +static int send_join(struct mcast_group *group, struct mcast_member *member) +{ + struct mcast_port *port = group->port; + int ret; + + group->last_join = member; + ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, + port->port_num, IB_MGMT_METHOD_SET, + &member->multicast.rec, + member->multicast.comp_mask, + 3000, GFP_KERNEL, join_handler, group, + &group->query); + return (ret > 0) ? 0 : ret; +} + +static int send_leave(struct mcast_group *group, u8 leave_state) +{ + struct mcast_port *port = group->port; + struct ib_sa_mcmember_rec rec; + int ret; + + rec = group->rec; + rec.join_state = leave_state; + group->leave_state = leave_state; + + ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, + port->port_num, IB_SA_METHOD_DELETE, &rec, + IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE, + 3000, GFP_KERNEL, leave_handler, + group, &group->query); + return (ret > 0) ? 0 : ret; +} + +static void join_group(struct mcast_group *group, struct mcast_member *member, + u8 join_state) +{ + member->state = MCAST_MEMBER; + adjust_membership(group, join_state, 1); + group->rec.join_state |= join_state; + member->multicast.rec = group->rec; + member->multicast.rec.join_state = join_state; + list_move(&member->list, &group->active_list); +} + +static int fail_join(struct mcast_group *group, struct mcast_member *member, + int status) +{ + spin_lock_irq(&group->lock); + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + return member->multicast.callback(status, &member->multicast); +} + +static void process_group_error(struct mcast_group *group) +{ + struct mcast_member *member; + int ret = 0; + u16 pkey_index; + + if (group->state == MCAST_PKEY_EVENT) + ret = ib_find_pkey(group->port->dev->device, + group->port->port_num, + be16_to_cpu(group->rec.pkey), &pkey_index); + + spin_lock_irq(&group->lock); + if (group->state == MCAST_PKEY_EVENT && !ret && + group->pkey_index == pkey_index) + goto out; + + while (!list_empty(&group->active_list)) { + member = list_entry(group->active_list.next, + struct mcast_member, list); + atomic_inc(&member->refcount); + list_del_init(&member->list); + adjust_membership(group, member->multicast.rec.join_state, -1); + member->state = MCAST_ERROR; + spin_unlock_irq(&group->lock); + + ret = member->multicast.callback(-ENETRESET, + &member->multicast); + deref_member(member); + if (ret) + ib_sa_free_multicast(&member->multicast); + spin_lock_irq(&group->lock); + } + + group->rec.join_state = 0; +out: + group->state = MCAST_BUSY; + spin_unlock_irq(&group->lock); +} + +static void mcast_work_handler(struct work_struct *work) +{ + struct mcast_group *group; + struct mcast_member *member; + struct ib_sa_multicast *multicast; + int status, ret; + u8 join_state; + + group = container_of(work, typeof(*group), work); +retest: + spin_lock_irq(&group->lock); + while (!list_empty(&group->pending_list) || + (group->state != MCAST_BUSY)) { + + if (group->state != MCAST_BUSY) { + spin_unlock_irq(&group->lock); + process_group_error(group); + goto retest; + } + + member = list_entry(group->pending_list.next, + struct mcast_member, list); + multicast = &member->multicast; + join_state = multicast->rec.join_state; + atomic_inc(&member->refcount); + + if (join_state == (group->rec.join_state & join_state)) { + status = cmp_rec(&group->rec, &multicast->rec, + multicast->comp_mask); + if (!status) + join_group(group, member, join_state); + else + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + ret = multicast->callback(status, multicast); + } else { + spin_unlock_irq(&group->lock); + status = send_join(group, member); + if (!status) { + deref_member(member); + return; + } + ret = fail_join(group, member, status); + } + + deref_member(member); + if (ret) + ib_sa_free_multicast(&member->multicast); + spin_lock_irq(&group->lock); + } + + join_state = get_leave_state(group); + if (join_state) { + group->rec.join_state &= ~join_state; + spin_unlock_irq(&group->lock); + if (send_leave(group, join_state)) + goto retest; + } else { + group->state = MCAST_IDLE; + spin_unlock_irq(&group->lock); + release_group(group); + } +} + +/* + * Fail a join request if it is still active - at the head of the pending queue. + */ +static void process_join_error(struct mcast_group *group, int status) +{ + struct mcast_member *member; + int ret; + + spin_lock_irq(&group->lock); + member = list_entry(group->pending_list.next, + struct mcast_member, list); + if (group->last_join == member) { + atomic_inc(&member->refcount); + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + ret = member->multicast.callback(status, &member->multicast); + deref_member(member); + if (ret) + ib_sa_free_multicast(&member->multicast); + } else + spin_unlock_irq(&group->lock); +} + +static void join_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context) +{ + struct mcast_group *group = context; + u16 pkey_index = MCAST_INVALID_PKEY_INDEX; + + if (status) + process_join_error(group, status); + else { + int mgids_changed, is_mgid0; + ib_find_pkey(group->port->dev->device, group->port->port_num, + be16_to_cpu(rec->pkey), &pkey_index); + + spin_lock_irq(&group->port->lock); + if (group->state == MCAST_BUSY && + group->pkey_index == MCAST_INVALID_PKEY_INDEX) + group->pkey_index = pkey_index; + mgids_changed = memcmp(&rec->mgid, &group->rec.mgid, + sizeof(group->rec.mgid)); + group->rec = *rec; + if (mgids_changed) { + rb_erase(&group->node, &group->port->table); + is_mgid0 = !memcmp(&mgid0, &group->rec.mgid, + sizeof(mgid0)); + mcast_insert(group->port, group, is_mgid0); + } + spin_unlock_irq(&group->port->lock); + } + mcast_work_handler(&group->work); +} + +static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, + void *context) +{ + struct mcast_group *group = context; + + if (status && group->retries > 0 && + !send_leave(group, group->leave_state)) + group->retries--; + else + mcast_work_handler(&group->work); +} + +static struct mcast_group *acquire_group(struct mcast_port *port, + union ib_gid *mgid, gfp_t gfp_mask) +{ + struct mcast_group *group, *cur_group; + unsigned long flags; + int is_mgid0; + + is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); + if (!is_mgid0) { + spin_lock_irqsave(&port->lock, flags); + group = mcast_find(port, mgid); + if (group) + goto found; + spin_unlock_irqrestore(&port->lock, flags); + } + + group = kzalloc(sizeof *group, gfp_mask); + if (!group) + return NULL; + + group->retries = 3; + group->port = port; + group->rec.mgid = *mgid; + group->pkey_index = MCAST_INVALID_PKEY_INDEX; + INIT_LIST_HEAD(&group->pending_list); + INIT_LIST_HEAD(&group->active_list); + INIT_WORK(&group->work, mcast_work_handler); + spin_lock_init(&group->lock); + + spin_lock_irqsave(&port->lock, flags); + cur_group = mcast_insert(port, group, is_mgid0); + if (cur_group) { + kfree(group); + group = cur_group; + } else + atomic_inc(&port->refcount); +found: + atomic_inc(&group->refcount); + spin_unlock_irqrestore(&port->lock, flags); + return group; +} + +/* + * We serialize all join requests to a single group to make our lives much + * easier. Otherwise, two users could try to join the same group + * simultaneously, with different configurations, one could leave while the + * join is in progress, etc., which makes locking around error recovery + * difficult. + */ +struct ib_sa_multicast * +ib_sa_join_multicast(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, gfp_t gfp_mask, + int (*callback)(int status, + struct ib_sa_multicast *multicast), + void *context) +{ + struct mcast_device *dev; + struct mcast_member *member; + struct ib_sa_multicast *multicast; + int ret; + + dev = ib_get_client_data(device, &mcast_client); + if (!dev) + return ERR_PTR(-ENODEV); + + member = kmalloc(sizeof *member, gfp_mask); + if (!member) + return ERR_PTR(-ENOMEM); + + ib_sa_client_get(client); + member->client = client; + member->multicast.rec = *rec; + member->multicast.comp_mask = comp_mask; + member->multicast.callback = callback; + member->multicast.context = context; + init_completion(&member->comp); + atomic_set(&member->refcount, 1); + member->state = MCAST_JOINING; + + member->group = acquire_group(&dev->port[port_num - dev->start_port], + &rec->mgid, gfp_mask); + if (!member->group) { + ret = -ENOMEM; + goto err; + } + + /* + * The user will get the multicast structure in their callback. They + * could then free the multicast structure before we can return from + * this routine. So we save the pointer to return before queuing + * any callback. + */ + multicast = &member->multicast; + queue_join(member); + return multicast; + +err: + ib_sa_client_put(client); + kfree(member); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_sa_join_multicast); + +void ib_sa_free_multicast(struct ib_sa_multicast *multicast) +{ + struct mcast_member *member; + struct mcast_group *group; + + member = container_of(multicast, struct mcast_member, multicast); + group = member->group; + + spin_lock_irq(&group->lock); + if (member->state == MCAST_MEMBER) + adjust_membership(group, multicast->rec.join_state, -1); + + list_del_init(&member->list); + + if (group->state == MCAST_IDLE) { + group->state = MCAST_BUSY; + spin_unlock_irq(&group->lock); + /* Continue to hold reference on group until callback */ + queue_work(mcast_wq, &group->work); + } else { + spin_unlock_irq(&group->lock); + release_group(group); + } + + deref_member(member); + wait_for_completion(&member->comp); + ib_sa_client_put(member->client); + kfree(member); +} +EXPORT_SYMBOL(ib_sa_free_multicast); + +int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, + union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) +{ + struct mcast_device *dev; + struct mcast_port *port; + struct mcast_group *group; + unsigned long flags; + int ret = 0; + + dev = ib_get_client_data(device, &mcast_client); + if (!dev) + return -ENODEV; + + port = &dev->port[port_num - dev->start_port]; + spin_lock_irqsave(&port->lock, flags); + group = mcast_find(port, mgid); + if (group) + *rec = group->rec; + else + ret = -EADDRNOTAVAIL; + spin_unlock_irqrestore(&port->lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_sa_get_mcmember_rec); + +int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + struct net_device *ndev, + enum ib_gid_type gid_type, + struct ib_ah_attr *ah_attr) +{ + int ret; + u16 gid_index; + u8 p; + + if (rdma_protocol_roce(device, port_num)) { + ret = ib_find_cached_gid_by_port(device, &rec->port_gid, + gid_type, port_num, + ndev, + &gid_index); + } else if (rdma_protocol_ib(device, port_num)) { + ret = ib_find_cached_gid(device, &rec->port_gid, + IB_GID_TYPE_IB, NULL, &p, + &gid_index); + } else { + ret = -EINVAL; + } + + if (ret) + return ret; + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = be16_to_cpu(rec->mlid); + ah_attr->sl = rec->sl; + ah_attr->port_num = port_num; + ah_attr->static_rate = rec->rate; + + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = rec->mgid; + + ah_attr->grh.sgid_index = (u8) gid_index; + ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); + ah_attr->grh.hop_limit = rec->hop_limit; + ah_attr->grh.traffic_class = rec->traffic_class; + + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_mcmember); + +static void mcast_groups_event(struct mcast_port *port, + enum mcast_group_state state) +{ + struct mcast_group *group; + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + for (node = rb_first(&port->table); node; node = rb_next(node)) { + group = rb_entry(node, struct mcast_group, node); + spin_lock(&group->lock); + if (group->state == MCAST_IDLE) { + atomic_inc(&group->refcount); + queue_work(mcast_wq, &group->work); + } + if (group->state != MCAST_GROUP_ERROR) + group->state = state; + spin_unlock(&group->lock); + } + spin_unlock_irqrestore(&port->lock, flags); +} + +static void mcast_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct mcast_device *dev; + int index; + + dev = container_of(handler, struct mcast_device, event_handler); + if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) + return; + + index = event->element.port_num - dev->start_port; + + switch (event->event) { + case IB_EVENT_PORT_ERR: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); + break; + case IB_EVENT_PKEY_CHANGE: + mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); + break; + default: + break; + } +} + +static void mcast_add_one(struct ib_device *device) +{ + struct mcast_device *dev; + struct mcast_port *port; + int i; + int count = 0; + + dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, + GFP_KERNEL); + if (!dev) + return; + + dev->start_port = rdma_start_port(device); + dev->end_port = rdma_end_port(device); + + for (i = 0; i <= dev->end_port - dev->start_port; i++) { + if (!rdma_cap_ib_mcast(device, dev->start_port + i)) + continue; + port = &dev->port[i]; + port->dev = dev; + port->port_num = dev->start_port + i; + spin_lock_init(&port->lock); + port->table = RB_ROOT; + init_completion(&port->comp); + atomic_set(&port->refcount, 1); + ++count; + } + + if (!count) { + kfree(dev); + return; + } + + dev->device = device; + ib_set_client_data(device, &mcast_client, dev); + + INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); + ib_register_event_handler(&dev->event_handler); +} + +static void mcast_remove_one(struct ib_device *device, void *client_data) +{ + struct mcast_device *dev = client_data; + struct mcast_port *port; + int i; + + if (!dev) + return; + + ib_unregister_event_handler(&dev->event_handler); + flush_workqueue(mcast_wq); + + for (i = 0; i <= dev->end_port - dev->start_port; i++) { + if (rdma_cap_ib_mcast(device, dev->start_port + i)) { + port = &dev->port[i]; + deref_port(port); + wait_for_completion(&port->comp); + } + } + + kfree(dev); +} + +int mcast_init(void) +{ + int ret; + + mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); + if (!mcast_wq) + return -ENOMEM; + + ib_sa_register_client(&sa_client); + + ret = ib_register_client(&mcast_client); + if (ret) + goto err; + return 0; + +err: + ib_sa_unregister_client(&sa_client); + destroy_workqueue(mcast_wq); + return ret; +} + +void mcast_cleanup(void) +{ + ib_unregister_client(&mcast_client); + ib_sa_unregister_client(&sa_client); + destroy_workqueue(mcast_wq); +} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_multicast.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_packer.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_packer.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_packer.c (revision 320592) @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include + +static u64 value_read(int offset, int size, void *structure) +{ + switch (size) { + case 1: return *(u8 *) ((char *)structure + offset); + case 2: return be16_to_cpup((__be16 *) ((char *)structure + offset)); + case 4: return be32_to_cpup((__be32 *) ((char *)structure + offset)); + case 8: return be64_to_cpup((__be64 *) ((char *)structure + offset)); + default: + pr_warn("Field size %d bits not handled\n", size * 8); + return 0; + } +} + +/** + * ib_pack - Pack a structure into a buffer + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @structure:Structure to pack from + * @buf:Buffer to pack into + * + * ib_pack() packs a list of structure fields into a buffer, + * controlled by the array of fields in @desc. + */ +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + void *buf) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + __be32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = value_read(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift); + addr = (__be32 *) buf + desc[i].offset_words; + *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + __be64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = value_read(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); + addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); + *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + pr_warn("Structure field %s of size %d bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); + } + + if (desc[i].struct_size_bytes) + memcpy((char *)buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + (char *)structure + desc[i].struct_offset_bytes, + desc[i].size_bits / 8); + else + memset((char *)buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + 0, + desc[i].size_bits / 8); + } + } +} +EXPORT_SYMBOL(ib_pack); + +static void value_write(int offset, int size, u64 val, void *structure) +{ + switch (size * 8) { + case 8: *( u8 *) ((char *)structure + offset) = val; break; + case 16: *(__be16 *) ((char *)structure + offset) = cpu_to_be16(val); break; + case 32: *(__be32 *) ((char *)structure + offset) = cpu_to_be32(val); break; + case 64: *(__be64 *) ((char *)structure + offset) = cpu_to_be64(val); break; + default: + pr_warn("Field size %d bits not handled\n", size * 8); + } +} + +/** + * ib_unpack - Unpack a buffer into a structure + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @buf:Buffer to unpack from + * @structure:Structure to unpack into + * + * ib_pack() unpacks a list of structure fields from a buffer, + * controlled by the array of fields in @desc. + */ +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (!desc[i].struct_size_bytes) + continue; + + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + u32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + mask = ((1ull << desc[i].size_bits) - 1) << shift; + addr = (__be32 *) buf + desc[i].offset_words; + val = (be32_to_cpup(addr) & mask) >> shift; + value_write(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + val, + structure); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + u64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + mask = (~0ull >> (64 - desc[i].size_bits)) << shift; + addr = (__be64 *) buf + desc[i].offset_words; + val = (be64_to_cpup(addr) & mask) >> shift; + value_write(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + val, + structure); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + pr_warn("Structure field %s of size %d bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits); + } + + memcpy((char *)structure + desc[i].struct_offset_bytes, + (char *)buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + desc[i].size_bits / 8); + } + } +} +EXPORT_SYMBOL(ib_unpack); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_packer.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_sa_query.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_sa_query.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_sa_query.c (revision 320592) @@ -0,0 +1,1580 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sa.h" +#include "core_priv.h" + +#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 +#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 +#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 + +struct ib_sa_sm_ah { + struct ib_ah *ah; + struct kref ref; + u16 pkey_index; + u8 src_path_mask; +}; + +struct ib_sa_classport_cache { + bool valid; + struct ib_class_port_info data; +}; + +struct ib_sa_port { + struct ib_mad_agent *agent; + struct ib_sa_sm_ah *sm_ah; + struct work_struct update_task; + struct ib_sa_classport_cache classport_info; + spinlock_t classport_lock; /* protects class port info set */ + spinlock_t ah_lock; + u8 port_num; +}; + +struct ib_sa_device { + int start_port, end_port; + struct ib_event_handler event_handler; + struct ib_sa_port port[0]; +}; + +struct ib_sa_query { + void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); + void (*release)(struct ib_sa_query *); + struct ib_sa_client *client; + struct ib_sa_port *port; + struct ib_mad_send_buf *mad_buf; + struct ib_sa_sm_ah *sm_ah; + int id; + u32 flags; + struct list_head list; /* Local svc request list */ + u32 seq; /* Local svc request sequence number */ + unsigned long timeout; /* Local svc timeout */ + u8 path_use; /* How will the pathrecord be used */ +}; + +#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 +#define IB_SA_CANCEL 0x00000002 + +struct ib_sa_service_query { + void (*callback)(int, struct ib_sa_service_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +struct ib_sa_path_query { + void (*callback)(int, struct ib_sa_path_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +struct ib_sa_guidinfo_query { + void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +struct ib_sa_classport_info_query { + void (*callback)(int, struct ib_class_port_info *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +struct ib_sa_mcmember_query { + void (*callback)(int, struct ib_sa_mcmember_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + +static void ib_sa_add_one(struct ib_device *device); +static void ib_sa_remove_one(struct ib_device *device, void *client_data); + +static struct ib_client sa_client = { + .name = "sa", + .add = ib_sa_add_one, + .remove = ib_sa_remove_one +}; + +static DEFINE_SPINLOCK(idr_lock); +static DEFINE_IDR(query_idr); + +static DEFINE_SPINLOCK(tid_lock); +static u32 tid; + +#define PATH_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ + .field_name = "sa_path_rec:" #field + +static const struct ib_field path_rec_table[] = { + { PATH_REC_FIELD(service_id), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 64 }, + { PATH_REC_FIELD(dgid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { PATH_REC_FIELD(sgid), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 128 }, + { PATH_REC_FIELD(dlid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 16 }, + { PATH_REC_FIELD(slid), + .offset_words = 10, + .offset_bits = 16, + .size_bits = 16 }, + { PATH_REC_FIELD(raw_traffic), + .offset_words = 11, + .offset_bits = 0, + .size_bits = 1 }, + { RESERVED, + .offset_words = 11, + .offset_bits = 1, + .size_bits = 3 }, + { PATH_REC_FIELD(flow_label), + .offset_words = 11, + .offset_bits = 4, + .size_bits = 20 }, + { PATH_REC_FIELD(hop_limit), + .offset_words = 11, + .offset_bits = 24, + .size_bits = 8 }, + { PATH_REC_FIELD(traffic_class), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 8 }, + { PATH_REC_FIELD(reversible), + .offset_words = 12, + .offset_bits = 8, + .size_bits = 1 }, + { PATH_REC_FIELD(numb_path), + .offset_words = 12, + .offset_bits = 9, + .size_bits = 7 }, + { PATH_REC_FIELD(pkey), + .offset_words = 12, + .offset_bits = 16, + .size_bits = 16 }, + { PATH_REC_FIELD(qos_class), + .offset_words = 13, + .offset_bits = 0, + .size_bits = 12 }, + { PATH_REC_FIELD(sl), + .offset_words = 13, + .offset_bits = 12, + .size_bits = 4 }, + { PATH_REC_FIELD(mtu_selector), + .offset_words = 13, + .offset_bits = 16, + .size_bits = 2 }, + { PATH_REC_FIELD(mtu), + .offset_words = 13, + .offset_bits = 18, + .size_bits = 6 }, + { PATH_REC_FIELD(rate_selector), + .offset_words = 13, + .offset_bits = 24, + .size_bits = 2 }, + { PATH_REC_FIELD(rate), + .offset_words = 13, + .offset_bits = 26, + .size_bits = 6 }, + { PATH_REC_FIELD(packet_life_time_selector), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 2 }, + { PATH_REC_FIELD(packet_life_time), + .offset_words = 14, + .offset_bits = 2, + .size_bits = 6 }, + { PATH_REC_FIELD(preference), + .offset_words = 14, + .offset_bits = 8, + .size_bits = 8 }, + { RESERVED, + .offset_words = 14, + .offset_bits = 16, + .size_bits = 48 }, +}; + +#define MCMEMBER_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ + .field_name = "sa_mcmember_rec:" #field + +static const struct ib_field mcmember_rec_table[] = { + { MCMEMBER_REC_FIELD(mgid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 128 }, + { MCMEMBER_REC_FIELD(port_gid), + .offset_words = 4, + .offset_bits = 0, + .size_bits = 128 }, + { MCMEMBER_REC_FIELD(qkey), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 32 }, + { MCMEMBER_REC_FIELD(mlid), + .offset_words = 9, + .offset_bits = 0, + .size_bits = 16 }, + { MCMEMBER_REC_FIELD(mtu_selector), + .offset_words = 9, + .offset_bits = 16, + .size_bits = 2 }, + { MCMEMBER_REC_FIELD(mtu), + .offset_words = 9, + .offset_bits = 18, + .size_bits = 6 }, + { MCMEMBER_REC_FIELD(traffic_class), + .offset_words = 9, + .offset_bits = 24, + .size_bits = 8 }, + { MCMEMBER_REC_FIELD(pkey), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 16 }, + { MCMEMBER_REC_FIELD(rate_selector), + .offset_words = 10, + .offset_bits = 16, + .size_bits = 2 }, + { MCMEMBER_REC_FIELD(rate), + .offset_words = 10, + .offset_bits = 18, + .size_bits = 6 }, + { MCMEMBER_REC_FIELD(packet_life_time_selector), + .offset_words = 10, + .offset_bits = 24, + .size_bits = 2 }, + { MCMEMBER_REC_FIELD(packet_life_time), + .offset_words = 10, + .offset_bits = 26, + .size_bits = 6 }, + { MCMEMBER_REC_FIELD(sl), + .offset_words = 11, + .offset_bits = 0, + .size_bits = 4 }, + { MCMEMBER_REC_FIELD(flow_label), + .offset_words = 11, + .offset_bits = 4, + .size_bits = 20 }, + { MCMEMBER_REC_FIELD(hop_limit), + .offset_words = 11, + .offset_bits = 24, + .size_bits = 8 }, + { MCMEMBER_REC_FIELD(scope), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 4 }, + { MCMEMBER_REC_FIELD(join_state), + .offset_words = 12, + .offset_bits = 4, + .size_bits = 4 }, + { MCMEMBER_REC_FIELD(proxy_join), + .offset_words = 12, + .offset_bits = 8, + .size_bits = 1 }, + { RESERVED, + .offset_words = 12, + .offset_bits = 9, + .size_bits = 23 }, +}; + +#define SERVICE_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ + .field_name = "sa_service_rec:" #field + +static const struct ib_field service_rec_table[] = { + { SERVICE_REC_FIELD(id), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 64 }, + { SERVICE_REC_FIELD(gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(pkey), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 16 }, + { SERVICE_REC_FIELD(lease), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 32 }, + { SERVICE_REC_FIELD(key), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 128 }, + { SERVICE_REC_FIELD(name), + .offset_words = 12, + .offset_bits = 0, + .size_bits = 64*8 }, + { SERVICE_REC_FIELD(data8), + .offset_words = 28, + .offset_bits = 0, + .size_bits = 16*8 }, + { SERVICE_REC_FIELD(data16), + .offset_words = 32, + .offset_bits = 0, + .size_bits = 8*16 }, + { SERVICE_REC_FIELD(data32), + .offset_words = 36, + .offset_bits = 0, + .size_bits = 4*32 }, + { SERVICE_REC_FIELD(data64), + .offset_words = 40, + .offset_bits = 0, + .size_bits = 2*64 }, +}; + +#define CLASSPORTINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ + .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ + .field_name = "ib_class_port_info:" #field + +static const struct ib_field classport_info_rec_table[] = { + { CLASSPORTINFO_REC_FIELD(base_version), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 8 }, + { CLASSPORTINFO_REC_FIELD(class_version), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 8 }, + { CLASSPORTINFO_REC_FIELD(capability_mask), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_lid), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(redirect_pkey), + .offset_words = 7, + .offset_bits = 16, + .size_bits = 16 }, + + { CLASSPORTINFO_REC_FIELD(redirect_qp), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_qkey), + .offset_words = 9, + .offset_bits = 0, + .size_bits = 32 }, + + { CLASSPORTINFO_REC_FIELD(trap_gid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 128 }, + { CLASSPORTINFO_REC_FIELD(trap_tcslfl), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 32 }, + + { CLASSPORTINFO_REC_FIELD(trap_lid), + .offset_words = 15, + .offset_bits = 0, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(trap_pkey), + .offset_words = 15, + .offset_bits = 16, + .size_bits = 16 }, + + { CLASSPORTINFO_REC_FIELD(trap_hlqp), + .offset_words = 16, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(trap_qkey), + .offset_words = 17, + .offset_bits = 0, + .size_bits = 32 }, +}; + +#define GUIDINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ + .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .field_name = "sa_guidinfo_rec:" #field + +static const struct ib_field guidinfo_rec_table[] = { + { GUIDINFO_REC_FIELD(lid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { GUIDINFO_REC_FIELD(block_num), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res1), + .offset_words = 0, + .offset_bits = 24, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res2), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { GUIDINFO_REC_FIELD(guid_info_list), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 512 }, +}; + +static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) +{ + query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE; +} + +static void free_sm_ah(struct kref *kref) +{ + struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); + + ib_destroy_ah(sm_ah->ah); + kfree(sm_ah); +} + +static void update_sm_ah(struct work_struct *work) +{ + struct ib_sa_port *port = + container_of(work, struct ib_sa_port, update_task); + struct ib_sa_sm_ah *new_ah; + struct ib_port_attr port_attr; + struct ib_ah_attr ah_attr; + + if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { + pr_warn("Couldn't query port\n"); + return; + } + + new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); + if (!new_ah) { + return; + } + + kref_init(&new_ah->ref); + new_ah->src_path_mask = (1 << port_attr.lmc) - 1; + + new_ah->pkey_index = 0; + if (ib_find_pkey(port->agent->device, port->port_num, + IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) + pr_err("Couldn't find index for default PKey\n"); + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = port_attr.sm_lid; + ah_attr.sl = port_attr.sm_sl; + ah_attr.port_num = port->port_num; + if (port_attr.grh_required) { + ah_attr.ah_flags = IB_AH_GRH; + ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix); + ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); + } + + new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); + if (IS_ERR(new_ah->ah)) { + pr_warn("Couldn't create new SM AH\n"); + kfree(new_ah); + return; + } + + spin_lock_irq(&port->ah_lock); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); + port->sm_ah = new_ah; + spin_unlock_irq(&port->ah_lock); + +} + +static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) +{ + if (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER) { + unsigned long flags; + struct ib_sa_device *sa_dev = + container_of(handler, typeof(*sa_dev), event_handler); + struct ib_sa_port *port = + &sa_dev->port[event->element.port_num - sa_dev->start_port]; + + if (!rdma_cap_ib_sa(handler->device, port->port_num)) + return; + + spin_lock_irqsave(&port->ah_lock, flags); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); + port->sm_ah = NULL; + spin_unlock_irqrestore(&port->ah_lock, flags); + + if (event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_LID_CHANGE) { + spin_lock_irqsave(&port->classport_lock, flags); + port->classport_info.valid = false; + spin_unlock_irqrestore(&port->classport_lock, flags); + } + queue_work(ib_wq, &sa_dev->port[event->element.port_num - + sa_dev->start_port].update_task); + } +} + +void ib_sa_register_client(struct ib_sa_client *client) +{ + atomic_set(&client->users, 1); + init_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_register_client); + +void ib_sa_unregister_client(struct ib_sa_client *client) +{ + ib_sa_client_put(client); + wait_for_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_unregister_client); + +/** + * ib_sa_cancel_query - try to cancel an SA query + * @id:ID of query to cancel + * @query:query pointer to cancel + * + * Try to cancel an SA query. If the id and query don't match up or + * the query has already completed, nothing is done. Otherwise the + * query is canceled and will complete with a status of -EINTR. + */ +void ib_sa_cancel_query(int id, struct ib_sa_query *query) +{ + unsigned long flags; + struct ib_mad_agent *agent; + struct ib_mad_send_buf *mad_buf; + + spin_lock_irqsave(&idr_lock, flags); + if (idr_find(&query_idr, id) != query) { + spin_unlock_irqrestore(&idr_lock, flags); + return; + } + agent = query->port->agent; + mad_buf = query->mad_buf; + spin_unlock_irqrestore(&idr_lock, flags); +} +EXPORT_SYMBOL(ib_sa_cancel_query); + +static u8 get_src_path_mask(struct ib_device *device, u8 port_num) +{ + struct ib_sa_device *sa_dev; + struct ib_sa_port *port; + unsigned long flags; + u8 src_path_mask; + + sa_dev = ib_get_client_data(device, &sa_client); + if (!sa_dev) + return 0x7f; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + spin_lock_irqsave(&port->ah_lock, flags); + src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f; + spin_unlock_irqrestore(&port->ah_lock, flags); + + return src_path_mask; +} + +int ib_init_ah_from_path(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) +{ + int ret; + u16 gid_index; + int use_roce; + struct net_device *ndev = NULL; + + memset(ah_attr, 0, sizeof *ah_attr); + ah_attr->dlid = be16_to_cpu(rec->dlid); + ah_attr->sl = rec->sl; + ah_attr->src_path_bits = be16_to_cpu(rec->slid) & + get_src_path_mask(device, port_num); + ah_attr->port_num = port_num; + ah_attr->static_rate = rec->rate; + + use_roce = rdma_cap_eth_ah(device, port_num); + + if (use_roce) { + struct net_device *idev; + struct net_device *resolved_dev; + struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex, + .net = rec->net ? rec->net : + &init_net}; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; + + if (!device->get_netdev) + return -EOPNOTSUPP; + + rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); + rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); + + /* validate the route */ + ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, + &dgid_addr._sockaddr, &dev_addr); + if (ret) + return ret; + + if ((dev_addr.network == RDMA_NETWORK_IPV4 || + dev_addr.network == RDMA_NETWORK_IPV6) && + rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + return -EINVAL; + + idev = device->get_netdev(device, port_num); + if (!idev) + return -ENODEV; + + resolved_dev = dev_get_by_index(dev_addr.net, + dev_addr.bound_dev_if); + if (resolved_dev->if_flags & IFF_LOOPBACK) { + dev_put(resolved_dev); + resolved_dev = idev; + dev_hold(resolved_dev); + } + ndev = ib_get_ndev_from_path(rec); + rcu_read_lock(); + if ((ndev && ndev != resolved_dev) || + (resolved_dev != idev && + !rdma_is_upper_dev_rcu(idev, resolved_dev))) + ret = -EHOSTUNREACH; + rcu_read_unlock(); + dev_put(idev); + dev_put(resolved_dev); + if (ret) { + if (ndev) + dev_put(ndev); + return ret; + } + } + + if (rec->hop_limit > 0 || use_roce) { + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.dgid = rec->dgid; + + ret = ib_find_cached_gid_by_port(device, &rec->sgid, + rec->gid_type, port_num, ndev, + &gid_index); + if (ret) { + if (ndev) + dev_put(ndev); + return ret; + } + + ah_attr->grh.sgid_index = gid_index; + ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); + ah_attr->grh.hop_limit = rec->hop_limit; + ah_attr->grh.traffic_class = rec->traffic_class; + if (ndev) + dev_put(ndev); + } + + if (use_roce) + memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); + + return 0; +} +EXPORT_SYMBOL(ib_init_ah_from_path); + +static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&query->port->ah_lock, flags); + if (!query->port->sm_ah) { + spin_unlock_irqrestore(&query->port->ah_lock, flags); + return -EAGAIN; + } + kref_get(&query->port->sm_ah->ref); + query->sm_ah = query->port->sm_ah; + spin_unlock_irqrestore(&query->port->ah_lock, flags); + + query->mad_buf = ib_create_send_mad(query->port->agent, 1, + query->sm_ah->pkey_index, + 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, + gfp_mask, + IB_MGMT_BASE_VERSION); + if (IS_ERR(query->mad_buf)) { + kref_put(&query->sm_ah->ref, free_sm_ah); + return -ENOMEM; + } + + query->mad_buf->ah = query->sm_ah->ah; + + return 0; +} + +static void free_mad(struct ib_sa_query *query) +{ + ib_free_send_mad(query->mad_buf); + kref_put(&query->sm_ah->ref, free_sm_ah); +} + +static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) +{ + unsigned long flags; + + memset(mad, 0, sizeof *mad); + + mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; + + spin_lock_irqsave(&tid_lock, flags); + mad->mad_hdr.tid = + cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); + spin_unlock_irqrestore(&tid_lock, flags); +} + +static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) +{ + bool preload = gfpflags_allow_blocking(gfp_mask); + unsigned long flags; + int ret, id; + + if (preload) + idr_preload(gfp_mask); + spin_lock_irqsave(&idr_lock, flags); + + id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); + + spin_unlock_irqrestore(&idr_lock, flags); + if (preload) + idr_preload_end(); + if (id < 0) + return id; + + query->mad_buf->timeout_ms = timeout_ms; + query->mad_buf->context[0] = query; + query->id = id; + + if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { + ib_sa_disable_local_svc(query); + } + + ret = ib_post_send_mad(query->mad_buf, NULL); + if (ret) { + spin_lock_irqsave(&idr_lock, flags); + idr_remove(&query_idr, id); + spin_unlock_irqrestore(&idr_lock, flags); + } + + /* + * It's not safe to dereference query any more, because the + * send may already have completed and freed the query in + * another context. + */ + return ret ? ret : id; +} + +void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) +{ + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); +} +EXPORT_SYMBOL(ib_sa_unpack_path); + +void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) +{ + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); +} +EXPORT_SYMBOL(ib_sa_pack_path); + +static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_path_query *query = + container_of(sa_query, struct ib_sa_path_query, sa_query); + + if (mad) { + struct ib_sa_path_rec rec; + + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + mad->data, &rec); + rec.net = NULL; + rec.ifindex = 0; + rec.gid_type = IB_GID_TYPE_IB; + eth_zero_addr(rec.dmac); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); +} + +/** + * ib_sa_path_rec_get - Start a Path get query + * @client:SA client + * @device:device to send query on + * @port_num: port number to send query on + * @rec:Path Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send a Path Record Get query to the SA to look up a path. The + * callback function will be called when the query completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_path_rec_get() is negative, it is an + * error code. Otherwise it is a query ID that can be used to cancel + * the query. + */ +int ib_sa_path_rec_get(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_path_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_path_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; + query->sa_query.release = ib_sa_path_rec_release; + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); + + *sa_query = &query->sa_query; + + query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; + query->sa_query.mad_buf->context[1] = rec; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_path_rec_get); + +static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_service_query *query = + container_of(sa_query, struct ib_sa_service_query, sa_query); + + if (mad) { + struct ib_sa_service_rec rec; + + ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); +} + +/** + * ib_sa_service_rec_query - Start Service Record operation + * @client:SA client + * @device:device to send request on + * @port_num: port number to send request on + * @method:SA method - should be get, set, or delete + * @rec:Service Record to send in request + * @comp_mask:component mask to send in request + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when request completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:request context, used to cancel request + * + * Send a Service Record set/get/delete to the SA to register, + * unregister or query a service record. + * The callback function will be called when the request completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_service_rec_query() is negative, it is an + * error code. Otherwise it is a request ID that can be used to cancel + * the query. + */ +int ib_sa_service_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_service_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) + return -EINVAL; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), + rec, mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_service_rec_query); + +static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_mcmember_query *query = + container_of(sa_query, struct ib_sa_mcmember_query, sa_query); + + if (mad) { + struct ib_sa_mcmember_rec rec; + + ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); +} + +int ib_sa_mcmember_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_mcmember_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; + query->sa_query.release = ib_sa_mcmember_rec_release; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), + rec, mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} + +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_guidinfo_query *query = + container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); + + if (mad) { + struct ib_sa_guidinfo_rec rec; + + ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); +} + +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_guidinfo_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) { + return -EINVAL; + } + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; + query->sa_query.release = ib_sa_guidinfo_rec_release; + + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, + mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); + +/* Support get SA ClassPortInfo */ +static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + unsigned long flags; + struct ib_sa_classport_info_query *query = + container_of(sa_query, struct ib_sa_classport_info_query, sa_query); + + if (mad) { + struct ib_class_port_info rec; + + ib_unpack(classport_info_rec_table, + ARRAY_SIZE(classport_info_rec_table), + mad->data, &rec); + + spin_lock_irqsave(&sa_query->port->classport_lock, flags); + if (!status && !sa_query->port->classport_info.valid) { + memcpy(&sa_query->port->classport_info.data, &rec, + sizeof(sa_query->port->classport_info.data)); + + sa_query->port->classport_info.valid = true; + } + spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); + + query->callback(status, &rec, query->context); + } else { + query->callback(status, NULL, query->context); + } +} + +static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_classport_info_query, + sa_query)); +} + +int ib_sa_classport_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_class_port_info *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_classport_info_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + struct ib_class_port_info cached_class_port_info; + int ret; + unsigned long flags; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + /* Use cached ClassPortInfo attribute if valid instead of sending mad */ + spin_lock_irqsave(&port->classport_lock, flags); + if (port->classport_info.valid && callback) { + memcpy(&cached_class_port_info, &port->classport_info.data, + sizeof(cached_class_port_info)); + spin_unlock_irqrestore(&port->classport_lock, flags); + callback(0, &cached_class_port_info, context); + return 0; + } + spin_unlock_irqrestore(&port->classport_lock, flags); + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL; + + query->sa_query.release = ib_sa_portclass_info_rec_release; + /* support GET only */ + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); + mad->sa_hdr.comp_mask = 0; + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_classport_info_rec_query); + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; + unsigned long flags; + + if (query->callback) + switch (mad_send_wc->status) { + case IB_WC_SUCCESS: + /* No callback -- already got recv */ + break; + case IB_WC_RESP_TIMEOUT_ERR: + query->callback(query, -ETIMEDOUT, NULL); + break; + case IB_WC_WR_FLUSH_ERR: + query->callback(query, -EINTR, NULL); + break; + default: + query->callback(query, -EIO, NULL); + break; + } + + spin_lock_irqsave(&idr_lock, flags); + idr_remove(&query_idr, query->id); + spin_unlock_irqrestore(&idr_lock, flags); + + free_mad(query); + ib_sa_client_put(query->client); + query->release(query); +} + +static void recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_sa_query *query; + + if (!send_buf) + return; + + query = send_buf->context[0]; + if (query->callback) { + if (mad_recv_wc->wc->status == IB_WC_SUCCESS) + query->callback(query, + mad_recv_wc->recv_buf.mad->mad_hdr.status ? + -EINVAL : 0, + (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); + else + query->callback(query, -EIO, NULL); + } + + ib_free_recv_mad(mad_recv_wc); +} + +static void ib_sa_add_one(struct ib_device *device) +{ + struct ib_sa_device *sa_dev; + int s, e, i; + int count = 0; + + s = rdma_start_port(device); + e = rdma_end_port(device); + + sa_dev = kzalloc(sizeof *sa_dev + + (e - s + 1) * sizeof (struct ib_sa_port), + GFP_KERNEL); + if (!sa_dev) + return; + + sa_dev->start_port = s; + sa_dev->end_port = e; + + for (i = 0; i <= e - s; ++i) { + spin_lock_init(&sa_dev->port[i].ah_lock); + if (!rdma_cap_ib_sa(device, i + 1)) + continue; + + sa_dev->port[i].sm_ah = NULL; + sa_dev->port[i].port_num = i + s; + + spin_lock_init(&sa_dev->port[i].classport_lock); + sa_dev->port[i].classport_info.valid = false; + + sa_dev->port[i].agent = + ib_register_mad_agent(device, i + s, IB_QPT_GSI, + NULL, 0, send_handler, + recv_handler, sa_dev, 0); + if (IS_ERR(sa_dev->port[i].agent)) + goto err; + + INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); + + count++; + } + + if (!count) + goto free; + + ib_set_client_data(device, &sa_client, sa_dev); + + /* + * We register our event handler after everything is set up, + * and then update our cached info after the event handler is + * registered to avoid any problems if a port changes state + * during our initialization. + */ + + INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); + if (ib_register_event_handler(&sa_dev->event_handler)) + goto err; + + for (i = 0; i <= e - s; ++i) { + if (rdma_cap_ib_sa(device, i + 1)) + update_sm_ah(&sa_dev->port[i].update_task); + } + + return; + +err: + while (--i >= 0) { + if (rdma_cap_ib_sa(device, i + 1)) + ib_unregister_mad_agent(sa_dev->port[i].agent); + } +free: + kfree(sa_dev); + return; +} + +static void ib_sa_remove_one(struct ib_device *device, void *client_data) +{ + struct ib_sa_device *sa_dev = client_data; + int i; + + if (!sa_dev) + return; + + ib_unregister_event_handler(&sa_dev->event_handler); + + flush_workqueue(ib_wq); + + for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { + if (rdma_cap_ib_sa(device, i + 1)) { + ib_unregister_mad_agent(sa_dev->port[i].agent); + if (sa_dev->port[i].sm_ah) + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); + } + + } + + kfree(sa_dev); +} + +int ib_sa_init(void) +{ + int ret; + + get_random_bytes(&tid, sizeof tid); + + ret = ib_register_client(&sa_client); + if (ret) { + pr_err("Couldn't register ib_sa client\n"); + goto err1; + } + + ret = mcast_init(); + if (ret) { + pr_err("Couldn't initialize multicast handling\n"); + goto err2; + } + + return 0; + +err2: + ib_unregister_client(&sa_client); +err1: + return ret; +} + +void ib_sa_cleanup(void) +{ + mcast_cleanup(); + ib_unregister_client(&sa_client); + idr_destroy(&query_idr); +} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_sa_query.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c (revision 320592) @@ -0,0 +1,1371 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +MODULE_AUTHOR("Libor Michalek"); +MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); +MODULE_LICENSE("Dual BSD/GPL"); + +struct ib_ucm_device { + int devnum; + struct cdev cdev; + struct device dev; + struct ib_device *ib_dev; +}; + +struct ib_ucm_file { + struct mutex file_mutex; + struct file *filp; + struct ib_ucm_device *device; + + struct list_head ctxs; + struct list_head events; + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + struct completion comp; + atomic_t ref; + int events_reported; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ + + struct ib_cm_id *cm_id; + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; +}; + +enum { + IB_UCM_MAJOR = 231, + IB_UCM_BASE_MINOR = 224, + IB_UCM_MAX_DEVICES = 32 +}; + +#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) + +static void ib_ucm_add_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device, void *client_data); + +static struct ib_client ucm_client = { + .name = "ucm", + .add = ib_ucm_add_one, + .remove = ib_ucm_remove_one +}; + +static DEFINE_MUTEX(ctx_id_mutex); +static DEFINE_IDR(ctx_id_table); +static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); + +static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) +{ + struct ib_ucm_context *ctx; + + mutex_lock(&ctx_id_mutex); + ctx = idr_find(&ctx_id_table, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + atomic_inc(&ctx->ref); + mutex_unlock(&ctx_id_mutex); + + return ctx; +} + +static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) +{ + if (atomic_dec_and_test(&ctx->ref)) + complete(&ctx->comp); +} + +static inline int ib_ucm_new_cm_id(int event) +{ + return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; +} + +static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) +{ + struct ib_ucm_event *uevent; + + mutex_lock(&ctx->file->file_mutex); + list_del(&ctx->file_list); + while (!list_empty(&ctx->events)) { + + uevent = list_entry(ctx->events.next, + struct ib_ucm_event, ctx_list); + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + mutex_unlock(&ctx->file->file_mutex); + + /* clear incoming connections. */ + if (ib_ucm_new_cm_id(uevent->resp.event)) + ib_destroy_cm_id(uevent->cm_id); + + kfree(uevent); + mutex_lock(&ctx->file->file_mutex); + } + mutex_unlock(&ctx->file->file_mutex); +} + +static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) +{ + struct ib_ucm_context *ctx; + + ctx = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) + return NULL; + + atomic_set(&ctx->ref, 1); + init_completion(&ctx->comp); + ctx->file = file; + INIT_LIST_HEAD(&ctx->events); + + mutex_lock(&ctx_id_mutex); + ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&ctx_id_mutex); + if (ctx->id < 0) + goto error; + + list_add_tail(&ctx->file_list, &file->ctxs); + return ctx; + +error: + kfree(ctx); + return NULL; +} + +static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, + struct ib_cm_req_event_param *kreq) +{ + ureq->remote_ca_guid = kreq->remote_ca_guid; + ureq->remote_qkey = kreq->remote_qkey; + ureq->remote_qpn = kreq->remote_qpn; + ureq->qp_type = kreq->qp_type; + ureq->starting_psn = kreq->starting_psn; + ureq->responder_resources = kreq->responder_resources; + ureq->initiator_depth = kreq->initiator_depth; + ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; + ureq->flow_control = kreq->flow_control; + ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; + ureq->retry_count = kreq->retry_count; + ureq->rnr_retry_count = kreq->rnr_retry_count; + ureq->srq = kreq->srq; + ureq->port = kreq->port; + + ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); + if (kreq->alternate_path) + ib_copy_path_rec_to_user(&ureq->alternate_path, + kreq->alternate_path); +} + +static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, + struct ib_cm_rep_event_param *krep) +{ + urep->remote_ca_guid = krep->remote_ca_guid; + urep->remote_qkey = krep->remote_qkey; + urep->remote_qpn = krep->remote_qpn; + urep->starting_psn = krep->starting_psn; + urep->responder_resources = krep->responder_resources; + urep->initiator_depth = krep->initiator_depth; + urep->target_ack_delay = krep->target_ack_delay; + urep->failover_accepted = krep->failover_accepted; + urep->flow_control = krep->flow_control; + urep->rnr_retry_count = krep->rnr_retry_count; + urep->srq = krep->srq; +} + +static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, + struct ib_cm_sidr_rep_event_param *krep) +{ + urep->status = krep->status; + urep->qkey = krep->qkey; + urep->qpn = krep->qpn; +}; + +static int ib_ucm_event_process(struct ib_cm_event *evt, + struct ib_ucm_event *uvt) +{ + void *info = NULL; + + switch (evt->event) { + case IB_CM_REQ_RECEIVED: + ib_ucm_event_req_get(&uvt->resp.u.req_resp, + &evt->param.req_rcvd); + uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; + uvt->resp.present = IB_UCM_PRES_PRIMARY; + uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? + IB_UCM_PRES_ALTERNATE : 0); + break; + case IB_CM_REP_RECEIVED: + ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, + &evt->param.rep_rcvd); + uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; + break; + case IB_CM_RTU_RECEIVED: + uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + break; + case IB_CM_DREQ_RECEIVED: + uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + break; + case IB_CM_DREP_RECEIVED: + uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; + uvt->resp.u.send_status = evt->param.send_status; + break; + case IB_CM_MRA_RECEIVED: + uvt->resp.u.mra_resp.timeout = + evt->param.mra_rcvd.service_timeout; + uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; + break; + case IB_CM_REJ_RECEIVED: + uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; + uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.rej_rcvd.ari_length; + info = evt->param.rej_rcvd.ari; + break; + case IB_CM_LAP_RECEIVED: + ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, + evt->param.lap_rcvd.alternate_path); + uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; + uvt->resp.present = IB_UCM_PRES_ALTERNATE; + break; + case IB_CM_APR_RECEIVED: + uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; + uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.apr_rcvd.info_len; + info = evt->param.apr_rcvd.apr_info; + break; + case IB_CM_SIDR_REQ_RECEIVED: + uvt->resp.u.sidr_req_resp.pkey = + evt->param.sidr_req_rcvd.pkey; + uvt->resp.u.sidr_req_resp.port = + evt->param.sidr_req_rcvd.port; + uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; + break; + case IB_CM_SIDR_REP_RECEIVED: + ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, + &evt->param.sidr_rep_rcvd); + uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; + uvt->info_len = evt->param.sidr_rep_rcvd.info_len; + info = evt->param.sidr_rep_rcvd.info; + break; + default: + uvt->resp.u.send_status = evt->param.send_status; + break; + } + + if (uvt->data_len) { + uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); + if (!uvt->data) + goto err1; + + uvt->resp.present |= IB_UCM_PRES_DATA; + } + + if (uvt->info_len) { + uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); + if (!uvt->info) + goto err2; + + uvt->resp.present |= IB_UCM_PRES_INFO; + } + return 0; + +err2: + kfree(uvt->data); +err1: + return -ENOMEM; +} + +static int ib_ucm_event_handler(struct ib_cm_id *cm_id, + struct ib_cm_event *event) +{ + struct ib_ucm_event *uevent; + struct ib_ucm_context *ctx; + int result = 0; + + ctx = cm_id->context; + + uevent = kzalloc(sizeof *uevent, GFP_KERNEL); + if (!uevent) + goto err1; + + uevent->ctx = ctx; + uevent->cm_id = cm_id; + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; + uevent->resp.event = event->event; + + result = ib_ucm_event_process(event, uevent); + if (result) + goto err2; + + mutex_lock(&ctx->file->file_mutex); + list_add_tail(&uevent->file_list, &ctx->file->events); + list_add_tail(&uevent->ctx_list, &ctx->events); + wake_up_interruptible(&ctx->file->poll_wait); + linux_poll_wakeup(ctx->file->filp); + mutex_unlock(&ctx->file->file_mutex); + return 0; + +err2: + kfree(uevent); +err1: + /* Destroy new cm_id's */ + return ib_ucm_new_cm_id(event->event); +} + +static ssize_t ib_ucm_event(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_event_get cmd; + struct ib_ucm_event *uevent; + int result = 0; + + if (out_len < sizeof(struct ib_ucm_event_resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->file_mutex); + while (list_empty(&file->events)) { + mutex_unlock(&file->file_mutex); + + if (file->filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->poll_wait, + !list_empty(&file->events))) + return -ERESTARTSYS; + + mutex_lock(&file->file_mutex); + } + + uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); + + if (ib_ucm_new_cm_id(uevent->resp.event)) { + ctx = ib_ucm_ctx_alloc(file); + if (!ctx) { + result = -ENOMEM; + goto done; + } + + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; + uevent->resp.id = ctx->id; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &uevent->resp, sizeof(uevent->resp))) { + result = -EFAULT; + goto done; + } + + if (uevent->data) { + if (cmd.data_len < uevent->data_len) { + result = -ENOMEM; + goto done; + } + if (copy_to_user((void __user *)(unsigned long)cmd.data, + uevent->data, uevent->data_len)) { + result = -EFAULT; + goto done; + } + } + + if (uevent->info) { + if (cmd.info_len < uevent->info_len) { + result = -ENOMEM; + goto done; + } + if (copy_to_user((void __user *)(unsigned long)cmd.info, + uevent->info, uevent->info_len)) { + result = -EFAULT; + goto done; + } + } + + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + uevent->ctx->events_reported++; + + kfree(uevent->data); + kfree(uevent->info); + kfree(uevent); +done: + mutex_unlock(&file->file_mutex); + return result; +} + +static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_create_id cmd; + struct ib_ucm_create_id_resp resp; + struct ib_ucm_context *ctx; + int result; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->file_mutex); + ctx = ib_ucm_ctx_alloc(file); + mutex_unlock(&file->file_mutex); + if (!ctx) + return -ENOMEM; + + ctx->uid = cmd.uid; + ctx->cm_id = ib_create_cm_id(file->device->ib_dev, + ib_ucm_event_handler, ctx); + if (IS_ERR(ctx->cm_id)) { + result = PTR_ERR(ctx->cm_id); + goto err1; + } + + resp.id = ctx->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + result = -EFAULT; + goto err2; + } + return 0; + +err2: + ib_destroy_cm_id(ctx->cm_id); +err1: + mutex_lock(&ctx_id_mutex); + idr_remove(&ctx_id_table, ctx->id); + mutex_unlock(&ctx_id_mutex); + kfree(ctx); + return result; +} + +static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_destroy_id cmd; + struct ib_ucm_destroy_id_resp resp; + struct ib_ucm_context *ctx; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&ctx_id_mutex); + ctx = idr_find(&ctx_id_table, cmd.id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + idr_remove(&ctx_id_table, ctx->id); + mutex_unlock(&ctx_id_mutex); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ib_ucm_ctx_put(ctx); + wait_for_completion(&ctx->comp); + + /* No new events will be generated after destroying the cm_id. */ + ib_destroy_cm_id(ctx->cm_id); + /* Cleanup events not yet reported to the user. */ + ib_ucm_cleanup_events(ctx); + + resp.events_reported = ctx->events_reported; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + + kfree(ctx); + return result; +} + +static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_attr_id_resp resp; + struct ib_ucm_attr_id cmd; + struct ib_ucm_context *ctx; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.service_id = ctx->cm_id->service_id; + resp.service_mask = ctx->cm_id->service_mask; + resp.local_id = ctx->cm_id->local_id; + resp.remote_id = ctx->cm_id->remote_id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + + ib_ucm_ctx_put(ctx); + return result; +} + +static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_uverbs_qp_attr resp; + struct ib_ucm_init_qp_attr cmd; + struct ib_ucm_context *ctx; + struct ib_qp_attr qp_attr; + int result = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.qp_attr_mask = 0; + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.qp_state = cmd.qp_state; + result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + if (result) + goto out; + + ib_copy_qp_attr_to_user(&resp, &qp_attr); + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + result = -EFAULT; + +out: + ib_ucm_ctx_put(ctx); + return result; +} + +static int ucm_validate_listen(__be64 service_id, __be64 service_mask) +{ + service_id &= service_mask; + + if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || + ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) + return -EINVAL; + + return 0; +} + +static ssize_t ib_ucm_listen(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_listen cmd; + struct ib_ucm_context *ctx; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + result = ucm_validate_listen(cmd.service_id, cmd.service_mask); + if (result) + goto out; + + result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); +out: + ib_ucm_ctx_put(ctx); + return result; +} + +static ssize_t ib_ucm_notify(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_notify cmd; + struct ib_ucm_context *ctx; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); + ib_ucm_ctx_put(ctx); + return result; +} + +static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) +{ + void *data; + + *dest = NULL; + + if (!len) + return 0; + + data = memdup_user((void __user *)(unsigned long)src, len); + if (IS_ERR(data)) + return PTR_ERR(data); + + *dest = data; + return 0; +} + +static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) +{ + struct ib_user_path_rec upath; + struct ib_sa_path_rec *sa_path; + + *path = NULL; + + if (!src) + return 0; + + sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); + if (!sa_path) + return -ENOMEM; + + if (copy_from_user(&upath, (void __user *)(unsigned long)src, + sizeof(upath))) { + + kfree(sa_path); + return -EFAULT; + } + + ib_copy_path_rec_from_user(sa_path, &upath); + *path = sa_path; + return 0; +} + +static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_req_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_req cmd; + int result; + + param.private_data = NULL; + param.primary_path = NULL; + param.alternate_path = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); + if (result) + goto done; + + param.private_data_len = cmd.len; + param.service_id = cmd.sid; + param.qp_num = cmd.qpn; + param.qp_type = cmd.qp_type; + param.starting_psn = cmd.psn; + param.peer_to_peer = cmd.peer_to_peer; + param.responder_resources = cmd.responder_resources; + param.initiator_depth = cmd.initiator_depth; + param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; + param.flow_control = cmd.flow_control; + param.local_cm_response_timeout = cmd.local_cm_response_timeout; + param.retry_count = cmd.retry_count; + param.rnr_retry_count = cmd.rnr_retry_count; + param.max_cm_retries = cmd.max_cm_retries; + param.srq = cmd.srq; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_req(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(param.private_data); + kfree(param.primary_path); + kfree(param.alternate_path); + return result; +} + +static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_rep_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_rep cmd; + int result; + + param.private_data = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + return result; + + param.qp_num = cmd.qpn; + param.starting_psn = cmd.psn; + param.private_data_len = cmd.len; + param.responder_resources = cmd.responder_resources; + param.initiator_depth = cmd.initiator_depth; + param.failover_accepted = cmd.failover_accepted; + param.flow_control = cmd.flow_control; + param.rnr_retry_count = cmd.rnr_retry_count; + param.srq = cmd.srq; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + ctx->uid = cmd.uid; + result = ib_send_cm_rep(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + + kfree(param.private_data); + return result; +} + +static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, + const char __user *inbuf, int in_len, + int (*func)(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len)) +{ + struct ib_ucm_private_data cmd; + struct ib_ucm_context *ctx; + const void *private_data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); + if (result) + return result; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = func(ctx->cm_id, private_data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + + kfree(private_data); + return result; +} + +static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); +} + +static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); +} + +static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); +} + +static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, + const char __user *inbuf, int in_len, + int (*func)(struct ib_cm_id *cm_id, + int status, + const void *info, + u8 info_len, + const void *data, + u8 data_len)) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_info cmd; + const void *data = NULL; + const void *info = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); + if (result) + goto done; + + result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); + if (result) + goto done; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = func(ctx->cm_id, cmd.status, info, cmd.info_len, + data, cmd.data_len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(data); + kfree(info); + return result; +} + +static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); +} + +static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); +} + +static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_ucm_mra cmd; + const void *data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); + if (result) + return result; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + + kfree(data); + return result; +} + +static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_ucm_context *ctx; + struct ib_sa_path_rec *path = NULL; + struct ib_ucm_lap cmd; + const void *data = NULL; + int result; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(&path, cmd.path); + if (result) + goto done; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(data); + kfree(path); + return result; +} + +static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_sidr_req_param param; + struct ib_ucm_context *ctx; + struct ib_ucm_sidr_req cmd; + int result; + + param.private_data = NULL; + param.path = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); + if (result) + goto done; + + result = ib_ucm_path_get(¶m.path, cmd.path); + if (result) + goto done; + + param.private_data_len = cmd.len; + param.service_id = cmd.sid; + param.timeout_ms = cmd.timeout; + param.max_cm_retries = cmd.max_cm_retries; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(param.private_data); + kfree(param.path); + return result; +} + +static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct ib_cm_sidr_rep_param param; + struct ib_ucm_sidr_rep cmd; + struct ib_ucm_context *ctx; + int result; + + param.info = NULL; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + result = ib_ucm_alloc_data(¶m.private_data, + cmd.data, cmd.data_len); + if (result) + goto done; + + result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); + if (result) + goto done; + + param.qp_num = cmd.qpn; + param.qkey = cmd.qkey; + param.status = cmd.status; + param.info_length = cmd.info_len; + param.private_data_len = cmd.data_len; + + ctx = ib_ucm_ctx_get(file, cmd.id); + if (!IS_ERR(ctx)) { + result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); + ib_ucm_ctx_put(ctx); + } else + result = PTR_ERR(ctx); + +done: + kfree(param.private_data); + kfree(param.info); + return result; +} + +static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, + const char __user *inbuf, + int in_len, int out_len) = { + [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, + [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, + [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, + [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, + [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, + [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, + [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, + [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, + [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, + [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, + [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, + [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, + [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, + [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, + [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, + [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, + [IB_USER_CM_CMD_EVENT] = ib_ucm_event, + [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, +}; + +static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct ib_ucm_file *file = filp->private_data; + struct ib_ucm_cmd_hdr hdr; + ssize_t result; + + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + + if (len < sizeof(hdr)) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + + if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + return -EINVAL; + + if (hdr.in + sizeof(hdr) > len) + return -EINVAL; + + result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), + hdr.in, hdr.out); + if (!result) + result = len; + + return result; +} + +static unsigned int ib_ucm_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct ib_ucm_file *file = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &file->poll_wait, wait); + + if (!list_empty(&file->events)) + mask = POLLIN | POLLRDNORM; + + return mask; +} + +/* + * ib_ucm_open() does not need the BKL: + * + * - no global state is referred to; + * - there is no ioctl method to race against; + * - no further module initialization is required for open to work + * after the device is registered. + */ +static int ib_ucm_open(struct inode *inode, struct file *filp) +{ + struct ib_ucm_file *file; + + file = kmalloc(sizeof(*file), GFP_KERNEL); + if (!file) + return -ENOMEM; + + INIT_LIST_HEAD(&file->events); + INIT_LIST_HEAD(&file->ctxs); + init_waitqueue_head(&file->poll_wait); + + mutex_init(&file->file_mutex); + + filp->private_data = file; + file->filp = filp; + file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); + + return nonseekable_open(inode, filp); +} + +static int ib_ucm_close(struct inode *inode, struct file *filp) +{ + struct ib_ucm_file *file = filp->private_data; + struct ib_ucm_context *ctx; + + mutex_lock(&file->file_mutex); + while (!list_empty(&file->ctxs)) { + ctx = list_entry(file->ctxs.next, + struct ib_ucm_context, file_list); + mutex_unlock(&file->file_mutex); + + mutex_lock(&ctx_id_mutex); + idr_remove(&ctx_id_table, ctx->id); + mutex_unlock(&ctx_id_mutex); + + ib_destroy_cm_id(ctx->cm_id); + ib_ucm_cleanup_events(ctx); + kfree(ctx); + + mutex_lock(&file->file_mutex); + } + mutex_unlock(&file->file_mutex); + kfree(file); + return 0; +} + +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); +static void ib_ucm_release_dev(struct device *dev) +{ + struct ib_ucm_device *ucm_dev; + + ucm_dev = container_of(dev, struct ib_ucm_device, dev); + cdev_del(&ucm_dev->cdev); + if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) + clear_bit(ucm_dev->devnum, dev_map); + else + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); + kfree(ucm_dev); +} + +static const struct file_operations ucm_fops = { + .owner = THIS_MODULE, + .open = ib_ucm_open, + .release = ib_ucm_close, + .write = ib_ucm_write, + .poll = ib_ucm_poll, + .llseek = no_llseek, +}; + +static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ib_ucm_device *ucm_dev; + + ucm_dev = container_of(dev, struct ib_ucm_device, dev); + return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); +} +static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static dev_t overflow_maj; +static int find_overflow_devnum(void) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + pr_err("ucm: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); + if (ret >= IB_UCM_MAX_DEVICES) + return -1; + + return ret; +} + +static void ib_ucm_add_one(struct ib_device *device) +{ + int devnum; + dev_t base; + struct ib_ucm_device *ucm_dev; + + if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) + return; + + ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); + if (!ucm_dev) + return; + + ucm_dev->ib_dev = device; + + devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (devnum >= IB_UCM_MAX_DEVICES) { + devnum = find_overflow_devnum(); + if (devnum < 0) + goto err; + + ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + ucm_dev->devnum = devnum; + base = devnum + IB_UCM_BASE_DEV; + set_bit(devnum, dev_map); + } + + cdev_init(&ucm_dev->cdev, &ucm_fops); + ucm_dev->cdev.owner = THIS_MODULE; + kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); + if (cdev_add(&ucm_dev->cdev, base, 1)) + goto err; + + ucm_dev->dev.class = &cm_class; + ucm_dev->dev.parent = device->dma_device; + ucm_dev->dev.devt = ucm_dev->cdev.dev; + ucm_dev->dev.release = ib_ucm_release_dev; + dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); + if (device_register(&ucm_dev->dev)) + goto err_cdev; + + if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) + goto err_dev; + + ib_set_client_data(device, &ucm_client, ucm_dev); + return; + +err_dev: + device_unregister(&ucm_dev->dev); +err_cdev: + cdev_del(&ucm_dev->cdev); + if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); +err: + kfree(ucm_dev); + return; +} + +static void ib_ucm_remove_one(struct ib_device *device, void *client_data) +{ + struct ib_ucm_device *ucm_dev = client_data; + + if (!ucm_dev) + return; + + device_unregister(&ucm_dev->dev); +} + +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_CM_ABI_VERSION)); + +static int __init ib_ucm_init(void) +{ + int ret; + + ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + pr_err("ucm: couldn't register device number\n"); + goto error1; + } + + ret = class_create_file(&cm_class, &class_attr_abi_version.attr); + if (ret) { + pr_err("ucm: couldn't create abi_version attribute\n"); + goto error2; + } + + ret = ib_register_client(&ucm_client); + if (ret) { + pr_err("ucm: couldn't register client\n"); + goto error3; + } + return 0; + +error3: + class_remove_file(&cm_class, &class_attr_abi_version.attr); +error2: + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); +error1: + return ret; +} + +static void __exit ib_ucm_cleanup(void) +{ + ib_unregister_client(&ucm_client); + class_remove_file(&cm_class, &class_attr_abi_version.attr); + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); + idr_destroy(&ctx_id_table); +} + +module_init_order(ib_ucm_init, SI_ORDER_THIRD); +module_exit(ib_ucm_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c (revision 320592) @@ -0,0 +1,1755 @@ +/* + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); +MODULE_LICENSE("Dual BSD/GPL"); + +static unsigned int max_backlog = 1024; + +struct ucma_file { + struct mutex mut; + struct file *filp; + struct list_head ctx_list; + struct list_head event_list; + wait_queue_head_t poll_wait; + struct workqueue_struct *close_wq; +}; + +struct ucma_context { + int id; + struct completion comp; + atomic_t ref; + int events_reported; + int backlog; + + struct ucma_file *file; + struct rdma_cm_id *cm_id; + u64 uid; + + struct list_head list; + struct list_head mc_list; + /* mark that device is in process of destroying the internal HW + * resources, protected by the global mut + */ + int closing; + /* sync between removal event and id destroy, protected by file mut */ + int destroying; + struct work_struct close_work; +}; + +struct ucma_multicast { + struct ucma_context *ctx; + int id; + int events_reported; + + u64 uid; + u8 join_state; + struct list_head list; + struct sockaddr_storage addr; +}; + +struct ucma_event { + struct ucma_context *ctx; + struct ucma_multicast *mc; + struct list_head list; + struct rdma_cm_id *cm_id; + struct rdma_ucm_event_resp resp; + struct work_struct close_work; +}; + +static DEFINE_MUTEX(mut); +static DEFINE_IDR(ctx_idr); +static DEFINE_IDR(multicast_idr); + +static inline struct ucma_context *_ucma_find_context(int id, + struct ucma_file *file) +{ + struct ucma_context *ctx; + + ctx = idr_find(&ctx_idr, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + return ctx; +} + +static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) +{ + struct ucma_context *ctx; + + mutex_lock(&mut); + ctx = _ucma_find_context(id, file); + if (!IS_ERR(ctx)) { + if (ctx->closing) + ctx = ERR_PTR(-EIO); + else + atomic_inc(&ctx->ref); + } + mutex_unlock(&mut); + return ctx; +} + +static void ucma_put_ctx(struct ucma_context *ctx) +{ + if (atomic_dec_and_test(&ctx->ref)) + complete(&ctx->comp); +} + +static void ucma_close_event_id(struct work_struct *work) +{ + struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); + + rdma_destroy_id(uevent_close->cm_id); + kfree(uevent_close); +} + +static void ucma_close_id(struct work_struct *work) +{ + struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); + + /* once all inflight tasks are finished, we close all underlying + * resources. The context is still alive till its explicit destryoing + * by its creator. + */ + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + /* No new events will be generated after destroying the id. */ + rdma_destroy_id(ctx->cm_id); +} + +static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) +{ + struct ucma_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + INIT_WORK(&ctx->close_work, ucma_close_id); + atomic_set(&ctx->ref, 1); + init_completion(&ctx->comp); + INIT_LIST_HEAD(&ctx->mc_list); + ctx->file = file; + + mutex_lock(&mut); + ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (ctx->id < 0) + goto error; + + list_add_tail(&ctx->list, &file->ctx_list); + return ctx; + +error: + kfree(ctx); + return NULL; +} + +static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) +{ + struct ucma_multicast *mc; + + mc = kzalloc(sizeof(*mc), GFP_KERNEL); + if (!mc) + return NULL; + + mutex_lock(&mut); + mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (mc->id < 0) + goto error; + + mc->ctx = ctx; + list_add_tail(&mc->list, &ctx->mc_list); + return mc; + +error: + kfree(mc); + return NULL; +} + +static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, + struct rdma_conn_param *src) +{ + if (src->private_data_len) + memcpy(dst->private_data, src->private_data, + src->private_data_len); + dst->private_data_len = src->private_data_len; + dst->responder_resources =src->responder_resources; + dst->initiator_depth = src->initiator_depth; + dst->flow_control = src->flow_control; + dst->retry_count = src->retry_count; + dst->rnr_retry_count = src->rnr_retry_count; + dst->srq = src->srq; + dst->qp_num = src->qp_num; +} + +static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, + struct rdma_ud_param *src) +{ + if (src->private_data_len) + memcpy(dst->private_data, src->private_data, + src->private_data_len); + dst->private_data_len = src->private_data_len; + ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); + dst->qp_num = src->qp_num; + dst->qkey = src->qkey; +} + +static void ucma_set_event_context(struct ucma_context *ctx, + struct rdma_cm_event *event, + struct ucma_event *uevent) +{ + uevent->ctx = ctx; + switch (event->event) { + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + uevent->mc = __DECONST(struct ucma_multicast *, + event->param.ud.private_data); + uevent->resp.uid = uevent->mc->uid; + uevent->resp.id = uevent->mc->id; + break; + default: + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; + break; + } +} + +/* Called with file->mut locked for the relevant context. */ +static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) +{ + struct ucma_context *ctx = cm_id->context; + struct ucma_event *con_req_eve; + int event_found = 0; + + if (ctx->destroying) + return; + + /* only if context is pointing to cm_id that it owns it and can be + * queued to be closed, otherwise that cm_id is an inflight one that + * is part of that context event list pending to be detached and + * reattached to its new context as part of ucma_get_event, + * handled separately below. + */ + if (ctx->cm_id == cm_id) { + mutex_lock(&mut); + ctx->closing = 1; + mutex_unlock(&mut); + queue_work(ctx->file->close_wq, &ctx->close_work); + return; + } + + list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { + if (con_req_eve->cm_id == cm_id && + con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { + list_del(&con_req_eve->list); + INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); + queue_work(ctx->file->close_wq, &con_req_eve->close_work); + event_found = 1; + break; + } + } + if (!event_found) + pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); +} + +static int ucma_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct ucma_event *uevent; + struct ucma_context *ctx = cm_id->context; + int ret = 0; + + uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) + return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; + + mutex_lock(&ctx->file->mut); + uevent->cm_id = cm_id; + ucma_set_event_context(ctx, event, uevent); + uevent->resp.event = event->event; + uevent->resp.status = event->status; + if (cm_id->qp_type == IB_QPT_UD) + ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); + else + ucma_copy_conn_event(&uevent->resp.param.conn, + &event->param.conn); + + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { + if (!ctx->backlog) { + ret = -ENOMEM; + kfree(uevent); + goto out; + } + ctx->backlog--; + } else if (!ctx->uid || ctx->cm_id != cm_id) { + /* + * We ignore events for new connections until userspace has set + * their context. This can only happen if an error occurs on a + * new connection before the user accepts it. This is okay, + * since the accept will just fail later. However, we do need + * to release the underlying HW resources in case of a device + * removal event. + */ + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ucma_removal_event_handler(cm_id); + + kfree(uevent); + goto out; + } + + list_add_tail(&uevent->list, &ctx->file->event_list); + wake_up_interruptible(&ctx->file->poll_wait); + linux_poll_wakeup(ctx->file->filp); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ucma_removal_event_handler(cm_id); +out: + mutex_unlock(&ctx->file->mut); + return ret; +} + +static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct ucma_context *ctx; + struct rdma_ucm_get_event cmd; + struct ucma_event *uevent; + int ret = 0; + + if (out_len < sizeof uevent->resp) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->mut); + while (list_empty(&file->event_list)) { + mutex_unlock(&file->mut); + + if (file->filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->poll_wait, + !list_empty(&file->event_list))) + return -ERESTARTSYS; + + mutex_lock(&file->mut); + } + + uevent = list_entry(file->event_list.next, struct ucma_event, list); + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { + ctx = ucma_alloc_ctx(file); + if (!ctx) { + ret = -ENOMEM; + goto done; + } + uevent->ctx->backlog++; + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; + uevent->resp.id = ctx->id; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &uevent->resp, sizeof uevent->resp)) { + ret = -EFAULT; + goto done; + } + + list_del(&uevent->list); + uevent->ctx->events_reported++; + if (uevent->mc) + uevent->mc->events_reported++; + kfree(uevent); +done: + mutex_unlock(&file->mut); + return ret; +} + +static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) +{ + switch (cmd->ps) { + case RDMA_PS_TCP: + *qp_type = IB_QPT_RC; + return 0; + case RDMA_PS_UDP: + case RDMA_PS_IPOIB: + *qp_type = IB_QPT_UD; + return 0; + case RDMA_PS_IB: + *qp_type = cmd->qp_type; + return 0; + default: + return -EINVAL; + } +} + +static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_create_id cmd; + struct rdma_ucm_create_id_resp resp; + struct ucma_context *ctx; + enum ib_qp_type qp_type; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ret = ucma_get_qp_type(&cmd, &qp_type); + if (ret) + return ret; + + mutex_lock(&file->mut); + ctx = ucma_alloc_ctx(file); + mutex_unlock(&file->mut); + if (!ctx) + return -ENOMEM; + + ctx->uid = cmd.uid; + ctx->cm_id = rdma_create_id(TD_TO_VNET(curthread), + ucma_event_handler, ctx, cmd.ps, qp_type); + if (IS_ERR(ctx->cm_id)) { + ret = PTR_ERR(ctx->cm_id); + goto err1; + } + + resp.id = ctx->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err2; + } + return 0; + +err2: + rdma_destroy_id(ctx->cm_id); +err1: + mutex_lock(&mut); + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&mut); + kfree(ctx); + return ret; +} + +static void ucma_cleanup_multicast(struct ucma_context *ctx) +{ + struct ucma_multicast *mc, *tmp; + + mutex_lock(&mut); + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { + list_del(&mc->list); + idr_remove(&multicast_idr, mc->id); + kfree(mc); + } + mutex_unlock(&mut); +} + +static void ucma_cleanup_mc_events(struct ucma_multicast *mc) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { + if (uevent->mc != mc) + continue; + + list_del(&uevent->list); + kfree(uevent); + } +} + +/* + * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At + * this point, no new events will be reported from the hardware. However, we + * still need to cleanup the UCMA context for this ID. Specifically, there + * might be events that have not yet been consumed by the user space software. + * These might include pending connect requests which we have not completed + * processing. We cannot call rdma_destroy_id while holding the lock of the + * context (file->mut), as it might cause a deadlock. We therefore extract all + * relevant events from the context pending events list while holding the + * mutex. After that we release them as needed. + */ +static int ucma_free_ctx(struct ucma_context *ctx) +{ + int events_reported; + struct ucma_event *uevent, *tmp; + LIST_HEAD(list); + + + ucma_cleanup_multicast(ctx); + + /* Cleanup events not yet reported to the user. */ + mutex_lock(&ctx->file->mut); + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &list); + } + list_del(&ctx->list); + mutex_unlock(&ctx->file->mut); + + list_for_each_entry_safe(uevent, tmp, &list, list) { + list_del(&uevent->list); + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + rdma_destroy_id(uevent->cm_id); + kfree(uevent); + } + + events_reported = ctx->events_reported; + kfree(ctx); + return events_reported; +} + +static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_destroy_id cmd; + struct rdma_ucm_destroy_id_resp resp; + struct ucma_context *ctx; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&mut); + ctx = _ucma_find_context(cmd.id, file); + if (!IS_ERR(ctx)) + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&mut); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&ctx->file->mut); + ctx->destroying = 1; + mutex_unlock(&ctx->file->mut); + + flush_workqueue(ctx->file->close_wq); + /* At this point it's guaranteed that there is no inflight + * closing task */ + mutex_lock(&mut); + if (!ctx->closing) { + mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + rdma_destroy_id(ctx->cm_id); + } else { + mutex_unlock(&mut); + } + + resp.events_reported = ucma_free_ctx(ctx); + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_bind_ip cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_bind cmd; + struct sockaddr *addr; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + addr = (struct sockaddr *) &cmd.addr; + if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_bind_addr(ctx->cm_id, addr); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_ip(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_ip cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, + cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_addr(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_addr cmd; + struct sockaddr *src, *dst; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + src = (struct sockaddr *) &cmd.src_addr; + dst = (struct sockaddr *) &cmd.dst_addr; + if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || + !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_route(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_route cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + dev_addr = &route->addr.dev_addr; + rdma_addr_get_dgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].dgid); + rdma_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); + resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + +static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, + (union ib_gid *)&resp->ib_route[0].dgid); + rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, + (union ib_gid *)&resp->ib_route[0].sgid); + resp->ib_route[0].pkey = cpu_to_be16(0xffff); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + +static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + + dev_addr = &route->addr.dev_addr; + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); +} + +static ssize_t ucma_query_route(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_query cmd; + struct rdma_ucm_query_route_resp resp; + struct ucma_context *ctx; + struct sockaddr *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + memset(&resp, 0, sizeof resp); + addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; + memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; + memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + if (!ctx->cm_id->device) + goto out; + + resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; + resp.port_num = ctx->cm_id->port_num; + + if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_iw_route(&resp, &ctx->cm_id->route); + +out: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_query_device_addr(struct rdma_cm_id *cm_id, + struct rdma_ucm_query_addr_resp *resp) +{ + if (!cm_id->device) + return; + + resp->node_guid = (__force __u64) cm_id->device->node_guid; + resp->port_num = cm_id->port_num; + resp->pkey = (__force __u16) cpu_to_be16( + ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); +} + +static ssize_t ucma_query_addr(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_addr_resp resp; + struct sockaddr *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + memset(&resp, 0, sizeof resp); + + addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; + resp.src_size = rdma_addr_size(addr); + memcpy(&resp.src_addr, addr, resp.src_size); + + addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; + resp.dst_size = rdma_addr_size(addr); + memcpy(&resp.dst_addr, addr, resp.dst_size); + + ucma_query_device_addr(ctx->cm_id, &resp); + + if (copy_to_user(response, &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_query_path(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_path_resp *resp; + int i, ret = 0; + + if (out_len < sizeof(*resp)) + return -ENOSPC; + + resp = kzalloc(out_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + resp->num_paths = ctx->cm_id->route.num_paths; + for (i = 0, out_len -= sizeof(*resp); + i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); + i++, out_len -= sizeof(struct ib_path_rec_data)) { + + resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | + IB_PATH_BIDIRECTIONAL; + ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], + &resp->path_data[i].path_rec); + } + + if (copy_to_user(response, resp, + sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) + ret = -EFAULT; + + kfree(resp); + return ret; +} + +static ssize_t ucma_query_gid(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_addr_resp resp; + struct sockaddr_ib *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + memset(&resp, 0, sizeof resp); + + ucma_query_device_addr(ctx->cm_id, &resp); + + addr = (struct sockaddr_ib *) &resp.src_addr; + resp.src_size = sizeof(*addr); + if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { + memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); + } else { + addr->sib_family = AF_IB; + addr->sib_pkey = (__force __be16) resp.pkey; + rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, + (union ib_gid *) &addr->sib_addr); + addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) + &ctx->cm_id->route.addr.src_addr); + } + + addr = (struct sockaddr_ib *) &resp.dst_addr; + resp.dst_size = sizeof(*addr); + if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { + memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); + } else { + addr->sib_family = AF_IB; + addr->sib_pkey = (__force __be16) resp.pkey; + rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, + (union ib_gid *) &addr->sib_addr); + addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) + &ctx->cm_id->route.addr.dst_addr); + } + + if (copy_to_user(response, &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_query(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_query cmd; + struct ucma_context *ctx; + void __user *response; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + response = (void __user *)(unsigned long) cmd.response; + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + switch (cmd.option) { + case RDMA_USER_CM_QUERY_ADDR: + ret = ucma_query_addr(ctx, response, out_len); + break; + case RDMA_USER_CM_QUERY_PATH: + ret = ucma_query_path(ctx, response, out_len); + break; + case RDMA_USER_CM_QUERY_GID: + ret = ucma_query_gid(ctx, response, out_len); + break; + default: + ret = -ENOSYS; + break; + } + + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_conn_param(struct rdma_cm_id *id, + struct rdma_conn_param *dst, + struct rdma_ucm_conn_param *src) +{ + dst->private_data = src->private_data; + dst->private_data_len = src->private_data_len; + dst->responder_resources =src->responder_resources; + dst->initiator_depth = src->initiator_depth; + dst->flow_control = src->flow_control; + dst->retry_count = src->retry_count; + dst->rnr_retry_count = src->rnr_retry_count; + dst->srq = src->srq; + dst->qp_num = src->qp_num; + dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; +} + +static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_connect cmd; + struct rdma_conn_param conn_param; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + if (!cmd.conn_param.valid) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + ret = rdma_connect(ctx->cm_id, &conn_param); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_listen cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? + cmd.backlog : max_backlog; + ret = rdma_listen(ctx->cm_id, ctx->backlog); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_accept cmd; + struct rdma_conn_param conn_param; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (cmd.conn_param.valid) { + ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + mutex_lock(&file->mut); + ret = rdma_accept(ctx->cm_id, &conn_param); + if (!ret) + ctx->uid = cmd.uid; + mutex_unlock(&file->mut); + } else + ret = rdma_accept(ctx->cm_id, NULL); + + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_reject cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_disconnect cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_disconnect(ctx->cm_id); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_init_qp_attr(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_init_qp_attr cmd; + struct ib_uverbs_qp_attr resp; + struct ucma_context *ctx; + struct ib_qp_attr qp_attr; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.qp_attr_mask = 0; + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.qp_state = cmd.qp_state; + ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + if (ret) + goto out; + + ib_copy_qp_attr_to_user(&resp, &qp_attr); + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + +out: + ucma_put_ctx(ctx); + return ret; +} + +static int ucma_set_option_id(struct ucma_context *ctx, int optname, + void *optval, size_t optlen) +{ + int ret = 0; + + switch (optname) { + case RDMA_OPTION_ID_TOS: + if (optlen != sizeof(u8)) { + ret = -EINVAL; + break; + } + rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); + break; + case RDMA_OPTION_ID_REUSEADDR: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; + case RDMA_OPTION_ID_AFONLY: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + +static int ucma_set_ib_path(struct ucma_context *ctx, + struct ib_path_rec_data *path_data, size_t optlen) +{ + struct ib_sa_path_rec sa_path; + struct rdma_cm_event event; + int ret; + + if (optlen % sizeof(*path_data)) + return -EINVAL; + + for (; optlen; optlen -= sizeof(*path_data), path_data++) { + if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | + IB_PATH_BIDIRECTIONAL)) + break; + } + + if (!optlen) + return -EINVAL; + + memset(&sa_path, 0, sizeof(sa_path)); + + ib_sa_unpack_path(path_data->path_rec, &sa_path); + ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); + if (ret) + return ret; + + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + return ucma_event_handler(ctx->cm_id, &event); +} + +static int ucma_set_option_ib(struct ucma_context *ctx, int optname, + void *optval, size_t optlen) +{ + int ret; + + switch (optname) { + case RDMA_OPTION_IB_PATH: + ret = ucma_set_ib_path(ctx, optval, optlen); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + +static int ucma_set_option_level(struct ucma_context *ctx, int level, + int optname, void *optval, size_t optlen) +{ + int ret; + + switch (level) { + case RDMA_OPTION_ID: + ret = ucma_set_option_id(ctx, optname, optval, optlen); + break; + case RDMA_OPTION_IB: + ret = ucma_set_option_ib(ctx, optname, optval, optlen); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + +static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_set_option cmd; + struct ucma_context *ctx; + void *optval; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + optval = memdup_user((void __user *) (unsigned long) cmd.optval, + cmd.optlen); + if (IS_ERR(optval)) { + ret = PTR_ERR(optval); + goto out; + } + + ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, + cmd.optlen); + kfree(optval); + +out: + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_notify cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_process_join(struct ucma_file *file, + struct rdma_ucm_join_mcast *cmd, int out_len) +{ + struct rdma_ucm_create_id_resp resp; + struct ucma_context *ctx; + struct ucma_multicast *mc; + struct sockaddr *addr; + int ret; + u8 join_state; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + addr = (struct sockaddr *) &cmd->addr; + if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + return -EINVAL; + + if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) + join_state = BIT(FULLMEMBER_JOIN); + else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) + join_state = BIT(SENDONLY_FULLMEMBER_JOIN); + else + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd->id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&file->mut); + mc = ucma_alloc_multicast(ctx); + if (!mc) { + ret = -ENOMEM; + goto err1; + } + mc->join_state = join_state; + mc->uid = cmd->uid; + memcpy(&mc->addr, addr, cmd->addr_size); + ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, + join_state, mc); + if (ret) + goto err2; + + resp.id = mc->id; + if (copy_to_user((void __user *)(unsigned long) cmd->response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err3; + } + + mutex_unlock(&file->mut); + ucma_put_ctx(ctx); + return 0; + +err3: + rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); + ucma_cleanup_mc_events(mc); +err2: + mutex_lock(&mut); + idr_remove(&multicast_idr, mc->id); + mutex_unlock(&mut); + list_del(&mc->list); + kfree(mc); +err1: + mutex_unlock(&file->mut); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_join_ip_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_ip_mcast cmd; + struct rdma_ucm_join_mcast join_cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + join_cmd.response = cmd.response; + join_cmd.uid = cmd.uid; + join_cmd.id = cmd.id; + join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); + join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; + memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); + + return ucma_process_join(file, &join_cmd, out_len); +} + +static ssize_t ucma_join_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_mcast cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + return ucma_process_join(file, &cmd, out_len); +} + +static ssize_t ucma_leave_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_destroy_id cmd; + struct rdma_ucm_destroy_id_resp resp; + struct ucma_multicast *mc; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&mut); + mc = idr_find(&multicast_idr, cmd.id); + if (!mc) + mc = ERR_PTR(-ENOENT); + else if (mc->ctx->file != file) + mc = ERR_PTR(-EINVAL); + else if (!atomic_inc_not_zero(&mc->ctx->ref)) + mc = ERR_PTR(-ENXIO); + else + idr_remove(&multicast_idr, mc->id); + mutex_unlock(&mut); + + if (IS_ERR(mc)) { + ret = PTR_ERR(mc); + goto out; + } + + rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); + mutex_lock(&mc->ctx->file->mut); + ucma_cleanup_mc_events(mc); + list_del(&mc->list); + mutex_unlock(&mc->ctx->file->mut); + + ucma_put_ctx(mc->ctx); + resp.events_reported = mc->events_reported; + kfree(mc); + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; +out: + return ret; +} + +static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + /* Acquire mutex's based on pointer comparison to prevent deadlock. */ + if (file1 < file2) { + mutex_lock(&file1->mut); + mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&file2->mut); + mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); + } +} + +static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + if (file1 < file2) { + mutex_unlock(&file2->mut); + mutex_unlock(&file1->mut); + } else { + mutex_unlock(&file1->mut); + mutex_unlock(&file2->mut); + } +} + +static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &file->event_list); +} + +static ssize_t ucma_migrate_id(struct ucma_file *new_file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_migrate_id cmd; + struct rdma_ucm_migrate_resp resp; + struct ucma_context *ctx; + struct fd f; + struct ucma_file *cur_file; + int ret = 0; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + /* Get current fd to protect against it being closed */ + f = fdget(cmd.fd); + if (!f.file) + return -ENOENT; + + /* Validate current fd and prevent destruction of id. */ + ctx = ucma_get_ctx(f.file->private_data, cmd.id); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto file_put; + } + + cur_file = ctx->file; + if (cur_file == new_file) { + resp.events_reported = ctx->events_reported; + goto response; + } + + /* + * Migrate events between fd's, maintaining order, and avoiding new + * events being added before existing events. + */ + ucma_lock_files(cur_file, new_file); + mutex_lock(&mut); + + list_move_tail(&ctx->list, &new_file->ctx_list); + ucma_move_events(ctx, new_file); + ctx->file = new_file; + resp.events_reported = ctx->events_reported; + + mutex_unlock(&mut); + ucma_unlock_files(cur_file, new_file); + +response: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); +file_put: + fdput(f); + return ret; +} + +static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) = { + [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, + [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, + [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, + [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, + [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, + [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, + [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, + [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, + [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, + [RDMA_USER_CM_CMD_REJECT] = ucma_reject, + [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, + [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, + [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, + [RDMA_USER_CM_CMD_GET_OPTION] = NULL, + [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, + [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, + [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, + [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, + [RDMA_USER_CM_CMD_QUERY] = ucma_query, + [RDMA_USER_CM_CMD_BIND] = ucma_bind, + [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast +}; + +static ssize_t ucma_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct ucma_file *file = filp->private_data; + struct rdma_ucm_cmd_hdr hdr; + ssize_t ret; + + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + + if (len < sizeof(hdr)) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + + if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + return -EINVAL; + + if (hdr.in + sizeof(hdr) > len) + return -EINVAL; + + if (!ucma_cmd_table[hdr.cmd]) + return -ENOSYS; + + ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); + if (!ret) + ret = len; + + return ret; +} + +static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct ucma_file *file = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &file->poll_wait, wait); + + if (!list_empty(&file->event_list)) + mask = POLLIN | POLLRDNORM; + + return mask; +} + +/* + * ucma_open() does not need the BKL: + * + * - no global state is referred to; + * - there is no ioctl method to race against; + * - no further module initialization is required for open to work + * after the device is registered. + */ +static int ucma_open(struct inode *inode, struct file *filp) +{ + struct ucma_file *file; + + file = kmalloc(sizeof *file, GFP_KERNEL); + if (!file) + return -ENOMEM; + + file->close_wq = alloc_ordered_workqueue("ucma_close_id", + WQ_MEM_RECLAIM); + if (!file->close_wq) { + kfree(file); + return -ENOMEM; + } + + INIT_LIST_HEAD(&file->event_list); + INIT_LIST_HEAD(&file->ctx_list); + init_waitqueue_head(&file->poll_wait); + mutex_init(&file->mut); + + filp->private_data = file; + file->filp = filp; + + return nonseekable_open(inode, filp); +} + +static int ucma_close(struct inode *inode, struct file *filp) +{ + struct ucma_file *file = filp->private_data; + struct ucma_context *ctx, *tmp; + + mutex_lock(&file->mut); + list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { + ctx->destroying = 1; + mutex_unlock(&file->mut); + + mutex_lock(&mut); + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&mut); + + flush_workqueue(file->close_wq); + /* At that step once ctx was marked as destroying and workqueue + * was flushed we are safe from any inflights handlers that + * might put other closing task. + */ + mutex_lock(&mut); + if (!ctx->closing) { + mutex_unlock(&mut); + /* rdma_destroy_id ensures that no event handlers are + * inflight for that id before releasing it. + */ + rdma_destroy_id(ctx->cm_id); + } else { + mutex_unlock(&mut); + } + + ucma_free_ctx(ctx); + mutex_lock(&file->mut); + } + mutex_unlock(&file->mut); + destroy_workqueue(file->close_wq); + kfree(file); + return 0; +} + +static long +ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + default: + return (-ENOTTY); + } +} + +static const struct file_operations ucma_fops = { + .owner = THIS_MODULE, + .open = ucma_open, + .release = ucma_close, + .write = ucma_write, + .unlocked_ioctl = ucma_ioctl, + .poll = ucma_poll, + .llseek = no_llseek, +}; + +static struct miscdevice ucma_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "rdma_cm", + .nodename = "infiniband/rdma_cm", + .mode = 0666, + .fops = &ucma_fops, +}; + +static ssize_t show_abi_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); +} +static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ucma_init(void) +{ + int ret; + + ret = misc_register(&ucma_misc); + if (ret) + return ret; + + ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); + if (ret) { + pr_err("rdma_ucm: couldn't create abi_version attr\n"); + goto err1; + } + + return 0; +err1: + misc_deregister(&ucma_misc); + return ret; +} + +static void __exit ucma_cleanup(void) +{ + device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); + misc_deregister(&ucma_misc); + idr_destroy(&ctx_idr); + idr_destroy(&multicast_idr); +} + +module_init(ucma_init); +module_exit(ucma_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ud_header.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ud_header.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ud_header.c (revision 320592) @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include + +#include + +#define STRUCT_FIELD(header, field) \ + .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ + .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + .field_name = #header ":" #field + +static const struct ib_field lrh_table[] = { + { STRUCT_FIELD(lrh, virtual_lane), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 4 }, + { STRUCT_FIELD(lrh, link_version), + .offset_words = 0, + .offset_bits = 4, + .size_bits = 4 }, + { STRUCT_FIELD(lrh, service_level), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 4 }, + { RESERVED, + .offset_words = 0, + .offset_bits = 12, + .size_bits = 2 }, + { STRUCT_FIELD(lrh, link_next_header), + .offset_words = 0, + .offset_bits = 14, + .size_bits = 2 }, + { STRUCT_FIELD(lrh, destination_lid), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 5 }, + { STRUCT_FIELD(lrh, packet_length), + .offset_words = 1, + .offset_bits = 5, + .size_bits = 11 }, + { STRUCT_FIELD(lrh, source_lid), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 } +}; + +static const struct ib_field eth_table[] = { + { STRUCT_FIELD(eth, dmac_h), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(eth, dmac_l), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(eth, smac_h), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(eth, smac_l), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(eth, type), + .offset_words = 3, + .offset_bits = 0, + .size_bits = 16 } +}; + +static const struct ib_field vlan_table[] = { + { STRUCT_FIELD(vlan, tag), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(vlan, type), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 } +}; + +static const struct ib_field ip4_table[] = { + { STRUCT_FIELD(ip4, ver), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 4 }, + { STRUCT_FIELD(ip4, hdr_len), + .offset_words = 0, + .offset_bits = 4, + .size_bits = 4 }, + { STRUCT_FIELD(ip4, tos), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 8 }, + { STRUCT_FIELD(ip4, tot_len), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(ip4, id), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(ip4, frag_off), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(ip4, ttl), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(ip4, protocol), + .offset_words = 2, + .offset_bits = 8, + .size_bits = 8 }, + { STRUCT_FIELD(ip4, check), + .offset_words = 2, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(ip4, saddr), + .offset_words = 3, + .offset_bits = 0, + .size_bits = 32 }, + { STRUCT_FIELD(ip4, daddr), + .offset_words = 4, + .offset_bits = 0, + .size_bits = 32 } +}; + +static const struct ib_field udp_table[] = { + { STRUCT_FIELD(udp, sport), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(udp, dport), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { STRUCT_FIELD(udp, length), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(udp, csum), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 } +}; + +static const struct ib_field grh_table[] = { + { STRUCT_FIELD(grh, ip_version), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 4 }, + { STRUCT_FIELD(grh, traffic_class), + .offset_words = 0, + .offset_bits = 4, + .size_bits = 8 }, + { STRUCT_FIELD(grh, flow_label), + .offset_words = 0, + .offset_bits = 12, + .size_bits = 20 }, + { STRUCT_FIELD(grh, payload_length), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(grh, next_header), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 8 }, + { STRUCT_FIELD(grh, hop_limit), + .offset_words = 1, + .offset_bits = 24, + .size_bits = 8 }, + { STRUCT_FIELD(grh, source_gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { STRUCT_FIELD(grh, destination_gid), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 128 } +}; + +static const struct ib_field bth_table[] = { + { STRUCT_FIELD(bth, opcode), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(bth, solicited_event), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 1 }, + { STRUCT_FIELD(bth, mig_req), + .offset_words = 0, + .offset_bits = 9, + .size_bits = 1 }, + { STRUCT_FIELD(bth, pad_count), + .offset_words = 0, + .offset_bits = 10, + .size_bits = 2 }, + { STRUCT_FIELD(bth, transport_header_version), + .offset_words = 0, + .offset_bits = 12, + .size_bits = 4 }, + { STRUCT_FIELD(bth, pkey), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(bth, destination_qpn), + .offset_words = 1, + .offset_bits = 8, + .size_bits = 24 }, + { STRUCT_FIELD(bth, ack_req), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 1 }, + { RESERVED, + .offset_words = 2, + .offset_bits = 1, + .size_bits = 7 }, + { STRUCT_FIELD(bth, psn), + .offset_words = 2, + .offset_bits = 8, + .size_bits = 24 } +}; + +static const struct ib_field deth_table[] = { + { STRUCT_FIELD(deth, qkey), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 32 }, + { RESERVED, + .offset_words = 1, + .offset_bits = 0, + .size_bits = 8 }, + { STRUCT_FIELD(deth, source_qpn), + .offset_words = 1, + .offset_bits = 8, + .size_bits = 24 } +}; + +__sum16 ib_ud_ip4_csum(struct ib_ud_header *header) +{ + struct ip iph; + + iph.ip_hl = 5; + iph.ip_v = 4; + iph.ip_tos = header->ip4.tos; + iph.ip_len = header->ip4.tot_len; + iph.ip_id = header->ip4.id; + iph.ip_off = header->ip4.frag_off; + iph.ip_ttl = header->ip4.ttl; + iph.ip_p = header->ip4.protocol; + iph.ip_sum = 0; + iph.ip_src.s_addr = header->ip4.saddr; + iph.ip_dst.s_addr = header->ip4.daddr; + + return in_cksum_hdr(&iph); +} +EXPORT_SYMBOL(ib_ud_ip4_csum); + +/** + * ib_ud_header_init - Initialize UD header structure + * @payload_bytes:Length of packet payload + * @lrh_present: specify if LRH is present + * @eth_present: specify if Eth header is present + * @vlan_present: packet is tagged vlan + * @grh_present: GRH flag (if non-zero, GRH will be included) + * @ip_version: if non-zero, IP header, V4 or V6, will be included + * @udp_present :if non-zero, UDP header will be included + * @immediate_present: specify if immediate data is present + * @header:Structure to initialize + */ +int ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int vlan_present, + int grh_present, + int ip_version, + int udp_present, + int immediate_present, + struct ib_ud_header *header) +{ + size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0; + + grh_present = grh_present && !ip_version; + memset(header, 0, sizeof *header); + + /* + * UDP header without IP header doesn't make sense + */ + if (udp_present && ip_version != 4 && ip_version != 6) + return -EINVAL; + + if (lrh_present) { + u16 packet_length; + + header->lrh.link_version = 0; + header->lrh.link_next_header = + grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; + packet_length = (IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + (grh_present ? IB_GRH_BYTES : 0) + + payload_bytes + + 4 + /* ICRC */ + 3) / 4; /* round up */ + header->lrh.packet_length = cpu_to_be16(packet_length); + } + + if (vlan_present) + header->eth.type = cpu_to_be16(ETH_P_8021Q); + + if (ip_version == 6 || grh_present) { + header->grh.ip_version = 6; + header->grh.payload_length = + cpu_to_be16((udp_bytes + + IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) & ~3); /* round up */ + header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b; + } + + if (ip_version == 4) { + header->ip4.ver = 4; /* version 4 */ + header->ip4.hdr_len = 5; /* 5 words */ + header->ip4.tot_len = + cpu_to_be16(IB_IP4_BYTES + + udp_bytes + + IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4); /* ICRC */ + header->ip4.protocol = IPPROTO_UDP; + } + if (udp_present && ip_version) + header->udp.length = + cpu_to_be16(IB_UDP_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4); /* ICRC */ + + if (immediate_present) + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + else + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + header->bth.pad_count = (4 - payload_bytes) & 3; + header->bth.transport_header_version = 0; + + header->lrh_present = lrh_present; + header->eth_present = eth_present; + header->vlan_present = vlan_present; + header->grh_present = grh_present || (ip_version == 6); + header->ipv4_present = ip_version == 4; + header->udp_present = udp_present; + header->immediate_present = immediate_present; + return 0; +} +EXPORT_SYMBOL(ib_ud_header_init); + +/** + * ib_ud_header_pack - Pack UD header struct into wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() packs the UD header structure @header into wire + * format in the buffer @buf. + */ +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf) +{ + int len = 0; + + if (header->lrh_present) { + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), + &header->lrh, (char *)buf + len); + len += IB_LRH_BYTES; + } + if (header->eth_present) { + ib_pack(eth_table, ARRAY_SIZE(eth_table), + &header->eth, (char *)buf + len); + len += IB_ETH_BYTES; + } + if (header->vlan_present) { + ib_pack(vlan_table, ARRAY_SIZE(vlan_table), + &header->vlan, (char *)buf + len); + len += IB_VLAN_BYTES; + } + if (header->grh_present) { + ib_pack(grh_table, ARRAY_SIZE(grh_table), + &header->grh, (char *)buf + len); + len += IB_GRH_BYTES; + } + if (header->ipv4_present) { + ib_pack(ip4_table, ARRAY_SIZE(ip4_table), + &header->ip4, (char *)buf + len); + len += IB_IP4_BYTES; + } + if (header->udp_present) { + ib_pack(udp_table, ARRAY_SIZE(udp_table), + &header->udp, (char *)buf + len); + len += IB_UDP_BYTES; + } + + ib_pack(bth_table, ARRAY_SIZE(bth_table), + &header->bth, (char *)buf + len); + len += IB_BTH_BYTES; + + ib_pack(deth_table, ARRAY_SIZE(deth_table), + &header->deth, (char *)buf + len); + len += IB_DETH_BYTES; + + if (header->immediate_present) { + memcpy((char *)buf + len, &header->immediate_data, sizeof header->immediate_data); + len += sizeof header->immediate_data; + } + + return len; +} +EXPORT_SYMBOL(ib_ud_header_pack); + +/** + * ib_ud_header_unpack - Unpack UD header struct from wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() unpacks the UD header structure @header from wire + * format in the buffer @buf. + */ +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header) +{ + ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), + buf, &header->lrh); + buf = (char *)buf + IB_LRH_BYTES; + + if (header->lrh.link_version != 0) { + pr_warn("Invalid LRH.link_version %d\n", + header->lrh.link_version); + return -EINVAL; + } + + switch (header->lrh.link_next_header) { + case IB_LNH_IBA_LOCAL: + header->grh_present = 0; + break; + + case IB_LNH_IBA_GLOBAL: + header->grh_present = 1; + ib_unpack(grh_table, ARRAY_SIZE(grh_table), + buf, &header->grh); + buf = (char *)buf + IB_GRH_BYTES; + + if (header->grh.ip_version != 6) { + pr_warn("Invalid GRH.ip_version %d\n", + header->grh.ip_version); + return -EINVAL; + } + if (header->grh.next_header != 0x1b) { + pr_warn("Invalid GRH.next_header 0x%02x\n", + header->grh.next_header); + return -EINVAL; + } + break; + + default: + pr_warn("Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header); + return -EINVAL; + } + + ib_unpack(bth_table, ARRAY_SIZE(bth_table), + buf, &header->bth); + buf = (char *)buf + IB_BTH_BYTES; + + switch (header->bth.opcode) { + case IB_OPCODE_UD_SEND_ONLY: + header->immediate_present = 0; + break; + case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: + header->immediate_present = 1; + break; + default: + pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode); + return -EINVAL; + } + + if (header->bth.transport_header_version != 0) { + pr_warn("Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version); + return -EINVAL; + } + + ib_unpack(deth_table, ARRAY_SIZE(deth_table), + buf, &header->deth); + buf = (char *)buf + IB_DETH_BYTES; + + if (header->immediate_present) + memcpy(&header->immediate_data, buf, sizeof header->immediate_data); + + return 0; +} +EXPORT_SYMBOL(ib_ud_header_unpack); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ud_header.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c (revision 320592) @@ -0,0 +1,1404 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008 Cisco. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "user_mad: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); +MODULE_LICENSE("Dual BSD/GPL"); + +enum { + IB_UMAD_MAX_PORTS = 64, + IB_UMAD_MAX_AGENTS = 32, + + IB_UMAD_MAJOR = 231, + IB_UMAD_MINOR_BASE = 0 +}; + +/* + * Our lifetime rules for these structs are the following: + * device special file is opened, we take a reference on the + * ib_umad_port's struct ib_umad_device. We drop these + * references in the corresponding close(). + * + * In addition to references coming from open character devices, there + * is one more reference to each ib_umad_device representing the + * module's reference taken when allocating the ib_umad_device in + * ib_umad_add_one(). + * + * When destroying an ib_umad_device, we drop the module's reference. + */ + +struct ib_umad_port { + struct cdev cdev; + struct device *dev; + + struct cdev sm_cdev; + struct device *sm_dev; + struct semaphore sm_sem; + + struct mutex file_mutex; + struct list_head file_list; + + struct ib_device *ib_dev; + struct ib_umad_device *umad_dev; + int dev_num; + u8 port_num; +}; + +struct ib_umad_device { + struct kobject kobj; + struct ib_umad_port port[0]; +}; + +struct ib_umad_file { + struct mutex mutex; + struct ib_umad_port *port; + struct file *filp; + struct list_head recv_list; + struct list_head send_list; + struct list_head port_list; + spinlock_t send_lock; + wait_queue_head_t recv_wait; + struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; + int agents_dead; + u8 use_pkey_index; + u8 already_used; +}; + +struct ib_umad_packet { + struct ib_mad_send_buf *msg; + struct ib_mad_recv_wc *recv_wc; + struct list_head list; + int length; + struct ib_user_mad mad; +}; + +static struct class *umad_class; + +static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); + +static DEFINE_SPINLOCK(port_lock); +static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); + +static void ib_umad_add_one(struct ib_device *device); +static void ib_umad_remove_one(struct ib_device *device, void *client_data); + +static void ib_umad_release_dev(struct kobject *kobj) +{ + struct ib_umad_device *dev = + container_of(kobj, struct ib_umad_device, kobj); + + kfree(dev); +} + +static struct kobj_type ib_umad_dev_ktype = { + .release = ib_umad_release_dev, +}; + +static int hdr_size(struct ib_umad_file *file) +{ + return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : + sizeof (struct ib_user_mad_hdr_old); +} + +/* caller must hold file->mutex */ +static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) +{ + return file->agents_dead ? NULL : file->agent[id]; +} + +static int queue_packet(struct ib_umad_file *file, + struct ib_mad_agent *agent, + struct ib_umad_packet *packet) +{ + int ret = 1; + + mutex_lock(&file->mutex); + + for (packet->mad.hdr.id = 0; + packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; + packet->mad.hdr.id++) + if (agent == __get_agent(file, packet->mad.hdr.id)) { + list_add_tail(&packet->list, &file->recv_list); + wake_up_interruptible(&file->recv_wait); + linux_poll_wakeup(file->filp); + ret = 0; + break; + } + + mutex_unlock(&file->mutex); + + return ret; +} + +static void dequeue_send(struct ib_umad_file *file, + struct ib_umad_packet *packet) +{ + spin_lock_irq(&file->send_lock); + list_del(&packet->list); + spin_unlock_irq(&file->send_lock); +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *send_wc) +{ + struct ib_umad_file *file = agent->context; + struct ib_umad_packet *packet = send_wc->send_buf->context[0]; + + dequeue_send(file, packet); + ib_destroy_ah(packet->msg->ah); + ib_free_send_mad(packet->msg); + + if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { + packet->length = IB_MGMT_MAD_HDR; + packet->mad.hdr.status = ETIMEDOUT; + if (!queue_packet(file, agent, packet)) + return; + } + kfree(packet); +} + +static void recv_handler(struct ib_mad_agent *agent, + struct ib_mad_send_buf *send_buf, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_umad_file *file = agent->context; + struct ib_umad_packet *packet; + + if (mad_recv_wc->wc->status != IB_WC_SUCCESS) + goto err1; + + packet = kzalloc(sizeof *packet, GFP_KERNEL); + if (!packet) + goto err1; + + packet->length = mad_recv_wc->mad_len; + packet->recv_wc = mad_recv_wc; + + packet->mad.hdr.status = 0; + packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; + packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); + if (packet->mad.hdr.grh_present) { + struct ib_ah_attr ah_attr; + + ib_init_ah_from_wc(agent->device, agent->port_num, + mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, + &ah_attr); + + packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; + packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; + packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; + memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); + packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); + } + + if (queue_packet(file, agent, packet)) + goto err2; + return; + +err2: + kfree(packet); +err1: + ib_free_recv_mad(mad_recv_wc); +} + +static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, + struct ib_umad_packet *packet, size_t count) +{ + struct ib_mad_recv_buf *recv_buf; + int left, seg_payload, offset, max_seg_payload; + size_t seg_size; + + recv_buf = &packet->recv_wc->recv_buf; + seg_size = packet->recv_wc->mad_seg_size; + + /* We need enough room to copy the first (or only) MAD segment. */ + if ((packet->length <= seg_size && + count < hdr_size(file) + packet->length) || + (packet->length > seg_size && + count < hdr_size(file) + seg_size)) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, hdr_size(file))) + return -EFAULT; + + buf += hdr_size(file); + seg_payload = min_t(int, packet->length, seg_size); + if (copy_to_user(buf, recv_buf->mad, seg_payload)) + return -EFAULT; + + if (seg_payload < packet->length) { + /* + * Multipacket RMPP MAD message. Copy remainder of message. + * Note that last segment may have a shorter payload. + */ + if (count < hdr_size(file) + packet->length) { + /* + * The buffer is too small, return the first RMPP segment, + * which includes the RMPP message length. + */ + return -ENOSPC; + } + offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); + max_seg_payload = seg_size - offset; + + for (left = packet->length - seg_payload, buf += seg_payload; + left; left -= seg_payload, buf += seg_payload) { + recv_buf = container_of(recv_buf->list.next, + struct ib_mad_recv_buf, list); + seg_payload = min(left, max_seg_payload); + if (copy_to_user(buf, (char *)recv_buf->mad + offset, + seg_payload)) + return -EFAULT; + } + } + return hdr_size(file) + packet->length; +} + +static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, + struct ib_umad_packet *packet, size_t count) +{ + ssize_t size = hdr_size(file) + packet->length; + + if (count < size) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, hdr_size(file))) + return -EFAULT; + + buf += hdr_size(file); + + if (copy_to_user(buf, packet->mad.data, packet->length)) + return -EFAULT; + + return size; +} + +static ssize_t ib_umad_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_umad_file *file = filp->private_data; + struct ib_umad_packet *packet; + ssize_t ret; + + if (count < hdr_size(file)) + return -EINVAL; + + mutex_lock(&file->mutex); + + while (list_empty(&file->recv_list)) { + mutex_unlock(&file->mutex); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->recv_wait, + !list_empty(&file->recv_list))) + return -ERESTARTSYS; + + mutex_lock(&file->mutex); + } + + packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); + list_del(&packet->list); + + mutex_unlock(&file->mutex); + + if (packet->recv_wc) + ret = copy_recv_mad(file, buf, packet, count); + else + ret = copy_send_mad(file, buf, packet, count); + + if (ret < 0) { + /* Requeue packet */ + mutex_lock(&file->mutex); + list_add(&packet->list, &file->recv_list); + mutex_unlock(&file->mutex); + } else { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); + kfree(packet); + } + return ret; +} + +static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) +{ + int left, seg; + + /* Copy class specific header */ + if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && + copy_from_user((char *)msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, + msg->hdr_len - IB_MGMT_RMPP_HDR)) + return -EFAULT; + + /* All headers are in place. Copy data segments. */ + for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; + seg++, left -= msg->seg_size, buf += msg->seg_size) { + if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, + min(left, msg->seg_size))) + return -EFAULT; + } + return 0; +} + +static int same_destination(struct ib_user_mad_hdr *hdr1, + struct ib_user_mad_hdr *hdr2) +{ + if (!hdr1->grh_present && !hdr2->grh_present) + return (hdr1->lid == hdr2->lid); + + if (hdr1->grh_present && hdr2->grh_present) + return !memcmp(hdr1->gid, hdr2->gid, 16); + + return 0; +} + +static int is_duplicate(struct ib_umad_file *file, + struct ib_umad_packet *packet) +{ + struct ib_umad_packet *sent_packet; + struct ib_mad_hdr *sent_hdr, *hdr; + + hdr = (struct ib_mad_hdr *) packet->mad.data; + list_for_each_entry(sent_packet, &file->send_list, list) { + sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; + + if ((hdr->tid != sent_hdr->tid) || + (hdr->mgmt_class != sent_hdr->mgmt_class)) + continue; + + /* + * No need to be overly clever here. If two new operations have + * the same TID, reject the second as a duplicate. This is more + * restrictive than required by the spec. + */ + if (!ib_response_mad(hdr)) { + if (!ib_response_mad(sent_hdr)) + return 1; + continue; + } else if (!ib_response_mad(sent_hdr)) + continue; + + if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) + return 1; + } + + return 0; +} + +static ssize_t ib_umad_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_umad_file *file = filp->private_data; + struct ib_umad_packet *packet; + struct ib_mad_agent *agent; + struct ib_ah_attr ah_attr; + struct ib_ah *ah; + struct ib_rmpp_mad *rmpp_mad; + __be64 *tid; + int ret, data_len, hdr_len, copy_offset, rmpp_active; + u8 base_version; + + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) + return -EINVAL; + + packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + if (!packet) + return -ENOMEM; + + if (copy_from_user(&packet->mad, buf, hdr_size(file))) { + ret = -EFAULT; + goto err; + } + + if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { + ret = -EINVAL; + goto err; + } + + buf += hdr_size(file); + + if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { + ret = -EFAULT; + goto err; + } + + mutex_lock(&file->mutex); + + agent = __get_agent(file, packet->mad.hdr.id); + if (!agent) { + ret = -EINVAL; + goto err_up; + } + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); + ah_attr.sl = packet->mad.hdr.sl; + ah_attr.src_path_bits = packet->mad.hdr.path_bits; + ah_attr.port_num = file->port->port_num; + if (packet->mad.hdr.grh_present) { + ah_attr.ah_flags = IB_AH_GRH; + memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); + ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; + } + + ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; + hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + + if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + && ib_mad_kernel_rmpp_agent(agent)) { + copy_offset = IB_MGMT_RMPP_HDR; + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; + } else { + copy_offset = IB_MGMT_MAD_HDR; + rmpp_active = 0; + } + + base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; + data_len = count - hdr_size(file) - hdr_len; + packet->msg = ib_create_send_mad(agent, + be32_to_cpu(packet->mad.hdr.qpn), + packet->mad.hdr.pkey_index, rmpp_active, + hdr_len, data_len, GFP_KERNEL, + base_version); + if (IS_ERR(packet->msg)) { + ret = PTR_ERR(packet->msg); + goto err_ah; + } + + packet->msg->ah = ah; + packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->retries = packet->mad.hdr.retries; + packet->msg->context[0] = packet; + + /* Copy MAD header. Any RMPP header is already in place. */ + memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); + + if (!rmpp_active) { + if (copy_from_user((char *)packet->msg->mad + copy_offset, + buf + copy_offset, + hdr_len + data_len - copy_offset)) { + ret = -EFAULT; + goto err_msg; + } + } else { + ret = copy_rmpp_mad(packet->msg, buf); + if (ret) + goto err_msg; + } + + /* + * Set the high-order part of the transaction ID to make MADs from + * different agents unique, and allow routing responses back to the + * original requestor. + */ + if (!ib_response_mad(packet->msg->mad)) { + tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; + *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | + (be64_to_cpup(tid) & 0xffffffff)); + rmpp_mad->mad_hdr.tid = *tid; + } + + if (!ib_mad_kernel_rmpp_agent(agent) + && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + spin_lock_irq(&file->send_lock); + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); + } else { + spin_lock_irq(&file->send_lock); + ret = is_duplicate(file, packet); + if (!ret) + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); + if (ret) { + ret = -EINVAL; + goto err_msg; + } + } + + ret = ib_post_send_mad(packet->msg, NULL); + if (ret) + goto err_send; + + mutex_unlock(&file->mutex); + return count; + +err_send: + dequeue_send(file, packet); +err_msg: + ib_free_send_mad(packet->msg); +err_ah: + ib_destroy_ah(ah); +err_up: + mutex_unlock(&file->mutex); +err: + kfree(packet); + return ret; +} + +static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct ib_umad_file *file = filp->private_data; + + /* we will always be able to post a MAD send */ + unsigned int mask = POLLOUT | POLLWRNORM; + + poll_wait(filp, &file->recv_wait, wait); + + if (!list_empty(&file->recv_list)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, + int compat_method_mask) +{ + struct ib_user_mad_reg_req ureq; + struct ib_mad_reg_req req; + struct ib_mad_agent *agent = NULL; + int agent_id; + int ret; + + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); + + if (!file->port->ib_dev) { + dev_notice(file->port->dev, + "ib_umad_reg_agent: invalid device\n"); + ret = -EPIPE; + goto out; + } + + if (copy_from_user(&ureq, arg, sizeof ureq)) { + ret = -EFAULT; + goto out; + } + + if (ureq.qpn != 0 && ureq.qpn != 1) { + dev_notice(file->port->dev, + "ib_umad_reg_agent: invalid QPN %d specified\n", + ureq.qpn); + ret = -EINVAL; + goto out; + } + + for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) + if (!__get_agent(file, agent_id)) + goto found; + + dev_notice(file->port->dev, + "ib_umad_reg_agent: Max Agents (%u) reached\n", + IB_UMAD_MAX_AGENTS); + ret = -ENOMEM; + goto out; + +found: + if (ureq.mgmt_class) { + memset(&req, 0, sizeof(req)); + req.mgmt_class = ureq.mgmt_class; + req.mgmt_class_version = ureq.mgmt_class_version; + memcpy(req.oui, ureq.oui, sizeof req.oui); + + if (compat_method_mask) { + u32 *umm = (u32 *) ureq.method_mask; + int i; + + for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) + req.method_mask[i] = + umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); + } else + memcpy(req.method_mask, ureq.method_mask, + sizeof req.method_mask); + } + + agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, + ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, + ureq.mgmt_class ? &req : NULL, + ureq.rmpp_version, + send_handler, recv_handler, file, 0); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + agent = NULL; + goto out; + } + + if (put_user(agent_id, + (u32 __user *) ((char *)arg + offsetof(struct ib_user_mad_reg_req, id)))) { + ret = -EFAULT; + goto out; + } + + if (!file->already_used) { + file->already_used = 1; + if (!file->use_pkey_index) { + dev_warn(file->port->dev, + "process %s did not enable P_Key index support.\n", + current->comm); + dev_warn(file->port->dev, + " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); + } + } + + file->agent[agent_id] = agent; + ret = 0; + +out: + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + + return ret; +} + +static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) +{ + struct ib_user_mad_reg_req2 ureq; + struct ib_mad_reg_req req; + struct ib_mad_agent *agent = NULL; + int agent_id; + int ret; + + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); + + if (!file->port->ib_dev) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2: invalid device\n"); + ret = -EPIPE; + goto out; + } + + if (copy_from_user(&ureq, arg, sizeof(ureq))) { + ret = -EFAULT; + goto out; + } + + if (ureq.qpn != 0 && ureq.qpn != 1) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2: invalid QPN %d specified\n", + ureq.qpn); + ret = -EINVAL; + goto out; + } + + if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { + const u32 flags = IB_USER_MAD_REG_FLAGS_CAP; + dev_notice(file->port->dev, + "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", + ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); + ret = -EINVAL; + + if (put_user(flags, + (u32 __user *) ((char *)arg + offsetof(struct + ib_user_mad_reg_req2, flags)))) + ret = -EFAULT; + + goto out; + } + + for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) + if (!__get_agent(file, agent_id)) + goto found; + + dev_notice(file->port->dev, + "ib_umad_reg_agent2: Max Agents (%u) reached\n", + IB_UMAD_MAX_AGENTS); + ret = -ENOMEM; + goto out; + +found: + if (ureq.mgmt_class) { + memset(&req, 0, sizeof(req)); + req.mgmt_class = ureq.mgmt_class; + req.mgmt_class_version = ureq.mgmt_class_version; + if (ureq.oui & 0xff000000) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", + ureq.oui); + ret = -EINVAL; + goto out; + } + req.oui[2] = ureq.oui & 0x0000ff; + req.oui[1] = (ureq.oui & 0x00ff00) >> 8; + req.oui[0] = (ureq.oui & 0xff0000) >> 16; + memcpy(req.method_mask, ureq.method_mask, + sizeof(req.method_mask)); + } + + agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, + ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, + ureq.mgmt_class ? &req : NULL, + ureq.rmpp_version, + send_handler, recv_handler, file, + ureq.flags); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + agent = NULL; + goto out; + } + + if (put_user(agent_id, + (u32 __user *)((char *)arg + + offsetof(struct ib_user_mad_reg_req2, id)))) { + ret = -EFAULT; + goto out; + } + + if (!file->already_used) { + file->already_used = 1; + file->use_pkey_index = 1; + } + + file->agent[agent_id] = agent; + ret = 0; + +out: + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + + return ret; +} + + +static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) +{ + struct ib_mad_agent *agent = NULL; + u32 id; + int ret = 0; + + if (get_user(id, arg)) + return -EFAULT; + + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); + + if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + ret = -EINVAL; + goto out; + } + + agent = file->agent[id]; + file->agent[id] = NULL; + +out: + mutex_unlock(&file->mutex); + + if (agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + + return ret; +} + +static long ib_umad_enable_pkey(struct ib_umad_file *file) +{ + int ret = 0; + + mutex_lock(&file->mutex); + if (file->already_used) + ret = -EINVAL; + else + file->use_pkey_index = 1; + mutex_unlock(&file->mutex); + + return ret; +} + +static long ib_umad_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case IB_USER_MAD_REGISTER_AGENT: + return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); + case IB_USER_MAD_UNREGISTER_AGENT: + return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); + case IB_USER_MAD_ENABLE_PKEY: + return ib_umad_enable_pkey(filp->private_data); + case IB_USER_MAD_REGISTER_AGENT2: + return ib_umad_reg_agent2(filp->private_data, (void __user *) arg); + default: + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case IB_USER_MAD_REGISTER_AGENT: + return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); + case IB_USER_MAD_UNREGISTER_AGENT: + return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); + case IB_USER_MAD_ENABLE_PKEY: + return ib_umad_enable_pkey(filp->private_data); + case IB_USER_MAD_REGISTER_AGENT2: + return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg)); + default: + return -ENOIOCTLCMD; + } +} +#endif + +/* + * ib_umad_open() does not need the BKL: + * + * - the ib_umad_port structures are properly reference counted, and + * everything else is purely local to the file being created, so + * races against other open calls are not a problem; + * - the ioctl method does not affect any global state outside of the + * file structure being operated on; + */ +static int ib_umad_open(struct inode *inode, struct file *filp) +{ + struct ib_umad_port *port; + struct ib_umad_file *file; + int ret = -ENXIO; + + port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, cdev); + + mutex_lock(&port->file_mutex); + + if (!port->ib_dev) + goto out; + + ret = -ENOMEM; + file = kzalloc(sizeof *file, GFP_KERNEL); + if (!file) + goto out; + + mutex_init(&file->mutex); + spin_lock_init(&file->send_lock); + INIT_LIST_HEAD(&file->recv_list); + INIT_LIST_HEAD(&file->send_list); + init_waitqueue_head(&file->recv_wait); + + file->port = port; + file->filp = filp; + filp->private_data = file; + + list_add_tail(&file->port_list, &port->file_list); + + ret = nonseekable_open(inode, filp); + if (ret) { + list_del(&file->port_list); + kfree(file); + goto out; + } + + kobject_get(&port->umad_dev->kobj); + +out: + mutex_unlock(&port->file_mutex); + return ret; +} + +static int ib_umad_close(struct inode *inode, struct file *filp) +{ + struct ib_umad_file *file = filp->private_data; + struct ib_umad_device *dev = file->port->umad_dev; + struct ib_umad_packet *packet, *tmp; + int already_dead; + int i; + + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); + + already_dead = file->agents_dead; + file->agents_dead = 1; + + list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); + kfree(packet); + } + + list_del(&file->port_list); + + mutex_unlock(&file->mutex); + + if (!already_dead) + for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) + if (file->agent[i]) + ib_unregister_mad_agent(file->agent[i]); + + mutex_unlock(&file->port->file_mutex); + + kfree(file); + kobject_put(&dev->kobj); + + return 0; +} + +static const struct file_operations umad_fops = { + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, + .unlocked_ioctl = ib_umad_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ib_umad_compat_ioctl, +#endif + .open = ib_umad_open, + .release = ib_umad_close, + .llseek = no_llseek, +}; + +static int ib_umad_sm_open(struct inode *inode, struct file *filp) +{ + struct ib_umad_port *port; + struct ib_port_modify props = { + .set_port_cap_mask = IB_PORT_SM + }; + int ret; + + port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, sm_cdev); + + if (filp->f_flags & O_NONBLOCK) { + if (down_trylock(&port->sm_sem)) { + ret = -EAGAIN; + goto fail; + } + } else { + if (down_interruptible(&port->sm_sem)) { + ret = -ERESTARTSYS; + goto fail; + } + } + + ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); + if (ret) + goto err_up_sem; + + filp->private_data = port; + + ret = nonseekable_open(inode, filp); + if (ret) + goto err_clr_sm_cap; + + kobject_get(&port->umad_dev->kobj); + + return 0; + +err_clr_sm_cap: + swap(props.set_port_cap_mask, props.clr_port_cap_mask); + ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: + up(&port->sm_sem); + +fail: + return ret; +} + +static int ib_umad_sm_close(struct inode *inode, struct file *filp) +{ + struct ib_umad_port *port = filp->private_data; + struct ib_port_modify props = { + .clr_port_cap_mask = IB_PORT_SM + }; + int ret = 0; + + mutex_lock(&port->file_mutex); + if (port->ib_dev) + ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); + mutex_unlock(&port->file_mutex); + + up(&port->sm_sem); + + kobject_put(&port->umad_dev->kobj); + + return ret; +} + +static const struct file_operations umad_sm_fops = { + .owner = THIS_MODULE, + .open = ib_umad_sm_open, + .release = ib_umad_sm_close, + .llseek = no_llseek, +}; + +static struct ib_client umad_client = { + .name = "umad", + .add = ib_umad_add_one, + .remove = ib_umad_remove_one +}; + +static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ib_umad_port *port = dev_get_drvdata(dev); + + if (!port) + return -ENODEV; + + return sprintf(buf, "%s\n", port->ib_dev->name); +} +static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static ssize_t show_port(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ib_umad_port *port = dev_get_drvdata(dev); + + if (!port) + return -ENODEV; + + return sprintf(buf, "%d\n", port->port_num); +} +static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); + +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_MAD_ABI_VERSION)); + +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); +static int find_overflow_devnum(struct ib_device *device) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, + "infiniband_mad"); + if (ret) { + dev_err(&device->dev, + "couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); + if (ret >= IB_UMAD_MAX_PORTS) + return -1; + + return ret; +} + +static int ib_umad_init_port(struct ib_device *device, int port_num, + struct ib_umad_device *umad_dev, + struct ib_umad_port *port) +{ + int devnum; + dev_t base; + + spin_lock(&port_lock); + devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (devnum >= IB_UMAD_MAX_PORTS) { + spin_unlock(&port_lock); + devnum = find_overflow_devnum(device); + if (devnum < 0) + return -1; + + spin_lock(&port_lock); + port->dev_num = devnum + IB_UMAD_MAX_PORTS; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + port->dev_num = devnum; + base = devnum + base_dev; + set_bit(devnum, dev_map); + } + spin_unlock(&port_lock); + + port->ib_dev = device; + port->port_num = port_num; + sema_init(&port->sm_sem, 1); + mutex_init(&port->file_mutex); + INIT_LIST_HEAD(&port->file_list); + + cdev_init(&port->cdev, &umad_fops); + port->cdev.owner = THIS_MODULE; + port->cdev.kobj.parent = &umad_dev->kobj; + kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); + if (cdev_add(&port->cdev, base, 1)) + goto err_cdev; + + port->dev = device_create(umad_class, device->dma_device, + port->cdev.dev, port, + "umad%d", port->dev_num); + if (IS_ERR(port->dev)) + goto err_cdev; + + if (device_create_file(port->dev, &dev_attr_ibdev)) + goto err_dev; + if (device_create_file(port->dev, &dev_attr_port)) + goto err_dev; + + base += IB_UMAD_MAX_PORTS; + cdev_init(&port->sm_cdev, &umad_sm_fops); + port->sm_cdev.owner = THIS_MODULE; + port->sm_cdev.kobj.parent = &umad_dev->kobj; + kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); + if (cdev_add(&port->sm_cdev, base, 1)) + goto err_sm_cdev; + + port->sm_dev = device_create(umad_class, device->dma_device, + port->sm_cdev.dev, port, + "issm%d", port->dev_num); + if (IS_ERR(port->sm_dev)) + goto err_sm_cdev; + + if (device_create_file(port->sm_dev, &dev_attr_ibdev)) + goto err_sm_dev; + if (device_create_file(port->sm_dev, &dev_attr_port)) + goto err_sm_dev; + + return 0; + +err_sm_dev: + device_destroy(umad_class, port->sm_cdev.dev); + +err_sm_cdev: + cdev_del(&port->sm_cdev); + +err_dev: + device_destroy(umad_class, port->cdev.dev); + +err_cdev: + cdev_del(&port->cdev); + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); + + return -1; +} + +static void ib_umad_kill_port(struct ib_umad_port *port) +{ + struct ib_umad_file *file; + int id; + + dev_set_drvdata(port->dev, NULL); + dev_set_drvdata(port->sm_dev, NULL); + + device_destroy(umad_class, port->cdev.dev); + device_destroy(umad_class, port->sm_cdev.dev); + + cdev_del(&port->cdev); + cdev_del(&port->sm_cdev); + + mutex_lock(&port->file_mutex); + + port->ib_dev = NULL; + + list_for_each_entry(file, &port->file_list, port_list) { + mutex_lock(&file->mutex); + file->agents_dead = 1; + mutex_unlock(&file->mutex); + + for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) + if (file->agent[id]) + ib_unregister_mad_agent(file->agent[id]); + } + + mutex_unlock(&port->file_mutex); + + if (port->dev_num < IB_UMAD_MAX_PORTS) + clear_bit(port->dev_num, dev_map); + else + clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); +} + +static void ib_umad_add_one(struct ib_device *device) +{ + struct ib_umad_device *umad_dev; + int s, e, i; + int count = 0; + + s = rdma_start_port(device); + e = rdma_end_port(device); + + umad_dev = kzalloc(sizeof *umad_dev + + (e - s + 1) * sizeof (struct ib_umad_port), + GFP_KERNEL); + if (!umad_dev) + return; + + kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); + + for (i = s; i <= e; ++i) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + umad_dev->port[i - s].umad_dev = umad_dev; + + if (ib_umad_init_port(device, i, umad_dev, + &umad_dev->port[i - s])) + goto err; + + count++; + } + + if (!count) + goto free; + + ib_set_client_data(device, &umad_client, umad_dev); + + return; + +err: + while (--i >= s) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + ib_umad_kill_port(&umad_dev->port[i - s]); + } +free: + kobject_put(&umad_dev->kobj); +} + +static void ib_umad_remove_one(struct ib_device *device, void *client_data) +{ + struct ib_umad_device *umad_dev = client_data; + int i; + + if (!umad_dev) + return; + + for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { + if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) + ib_umad_kill_port(&umad_dev->port[i]); + } + + kobject_put(&umad_dev->kobj); +} + +static char *umad_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +static int __init ib_umad_init(void) +{ + int ret; + + ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, + "infiniband_mad"); + if (ret) { + pr_err("couldn't register device number\n"); + goto out; + } + + umad_class = class_create(THIS_MODULE, "infiniband_mad"); + if (IS_ERR(umad_class)) { + ret = PTR_ERR(umad_class); + pr_err("couldn't create class infiniband_mad\n"); + goto out_chrdev; + } + + umad_class->devnode = umad_devnode; + + ret = class_create_file(umad_class, &class_attr_abi_version.attr); + if (ret) { + pr_err("couldn't create abi_version attribute\n"); + goto out_class; + } + + ret = ib_register_client(&umad_client); + if (ret) { + pr_err("couldn't register ib_umad client\n"); + goto out_class; + } + + return 0; + +out_class: + class_destroy(umad_class); + +out_chrdev: + unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); + +out: + return ret; +} + +static void __exit ib_umad_cleanup(void) +{ + ib_unregister_client(&umad_client); + class_destroy(umad_class); + unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); +} + +module_init_order(ib_umad_init, SI_ORDER_THIRD); +module_exit(ib_umad_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c (revision 320592) @@ -0,0 +1,4251 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define LINUXKPI_PARAM_PREFIX ibcore_ + +#include +#include +#include +#include +#include + +#include + +#include "uverbs.h" +#include "core_priv.h" + +#include + +struct uverbs_lock_class { + char name[16]; +}; + +static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; +static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; +static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; +static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; + +/* + * The ib_uobject locking scheme is as follows: + * + * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it + * needs to be held during all idr write operations. When an object is + * looked up, a reference must be taken on the object's kref before + * dropping this lock. For read operations, the rcu_read_lock() + * and rcu_write_lock() but similarly the kref reference is grabbed + * before the rcu_read_unlock(). + * + * - Each object also has an rwsem. This rwsem must be held for + * reading while an operation that uses the object is performed. + * For example, while registering an MR, the associated PD's + * uobject.mutex must be held for reading. The rwsem must be held + * for writing while initializing or destroying an object. + * + * - In addition, each object has a "live" flag. If this flag is not + * set, then lookups of the object will fail even if it is found in + * the idr. This handles a reader that blocks and does not acquire + * the rwsem until after the object is destroyed. The destroy + * operation will set the live flag to 0 and then drop the rwsem; + * this will allow the reader to acquire the rwsem, see that the + * live flag is 0, and then drop the rwsem and its reference to + * object. The underlying storage will not be freed until the last + * reference to the object is dropped. + */ + +static void init_uobj(struct ib_uobject *uobj, u64 user_handle, + struct ib_ucontext *context, struct uverbs_lock_class *c) +{ + uobj->user_handle = user_handle; + uobj->context = context; + kref_init(&uobj->ref); + init_rwsem(&uobj->mutex); + uobj->live = 0; +} + +static void release_uobj(struct kref *kref) +{ + kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); +} + +static void put_uobj(struct ib_uobject *uobj) +{ + kref_put(&uobj->ref, release_uobj); +} + +static void put_uobj_read(struct ib_uobject *uobj) +{ + up_read(&uobj->mutex); + put_uobj(uobj); +} + +static void put_uobj_write(struct ib_uobject *uobj) +{ + up_write(&uobj->mutex); + put_uobj(uobj); +} + +static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&ib_uverbs_idr_lock); + + ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; + + spin_unlock(&ib_uverbs_idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; +} + +void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) +{ + spin_lock(&ib_uverbs_idr_lock); + idr_remove(idr, uobj->id); + spin_unlock(&ib_uverbs_idr_lock); +} + +static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + rcu_read_lock(); + uobj = idr_find(idr, id); + if (uobj) { + if (uobj->context == context) + kref_get(&uobj->ref); + else + uobj = NULL; + } + rcu_read_unlock(); + + return uobj; +} + +static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, + struct ib_ucontext *context, int nested) +{ + struct ib_uobject *uobj; + + uobj = __idr_get_uobj(idr, id, context); + if (!uobj) + return NULL; + + if (nested) + down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); + else + down_read(&uobj->mutex); + if (!uobj->live) { + put_uobj_read(uobj); + return NULL; + } + + return uobj; +} + +static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = __idr_get_uobj(idr, id, context); + if (!uobj) + return NULL; + + down_write(&uobj->mutex); + if (!uobj->live) { + put_uobj_write(uobj); + return NULL; + } + + return uobj; +} + +static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, + int nested) +{ + struct ib_uobject *uobj; + + uobj = idr_read_uobj(idr, id, context, nested); + return uobj ? uobj->object : NULL; +} + +static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); +} + +static void put_pd_read(struct ib_pd *pd) +{ + put_uobj_read(pd->uobject); +} + +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) +{ + return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); +} + +static void put_cq_read(struct ib_cq *cq) +{ + put_uobj_read(cq->uobject); +} + +static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); +} + +static void put_ah_read(struct ib_ah *ah) +{ + put_uobj_read(ah->uobject); +} + +static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); +} + +static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); +} + +static void put_wq_read(struct ib_wq *wq) +{ + put_uobj_read(wq->uobject); +} + +static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, + struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); +} + +static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) +{ + put_uobj_read(ind_table->uobject); +} + +static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); + return uobj ? uobj->object : NULL; +} + +static void put_qp_read(struct ib_qp *qp) +{ + put_uobj_read(qp->uobject); +} + +static void put_qp_write(struct ib_qp *qp) +{ + put_uobj_write(qp->uobject); +} + +static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); +} + +static void put_srq_read(struct ib_srq *srq) +{ + put_uobj_read(srq->uobject); +} + +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, + struct ib_uobject **uobj) +{ + *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); + return *uobj ? (*uobj)->object : NULL; +} + +static void put_xrcd_read(struct ib_uobject *uobj) +{ + put_uobj_read(uobj); +} + +ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_get_context cmd; + struct ib_uverbs_get_context_resp resp; + struct ib_udata udata; + struct ib_ucontext *ucontext; + struct file *filp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->mutex); + + if (file->ucontext) { + ret = -EINVAL; + goto err; + } + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(ucontext); + goto err; + } + + ucontext->device = ib_dev; + INIT_LIST_HEAD(&ucontext->pd_list); + INIT_LIST_HEAD(&ucontext->mr_list); + INIT_LIST_HEAD(&ucontext->mw_list); + INIT_LIST_HEAD(&ucontext->cq_list); + INIT_LIST_HEAD(&ucontext->qp_list); + INIT_LIST_HEAD(&ucontext->srq_list); + INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->wq_list); + INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); + INIT_LIST_HEAD(&ucontext->xrcd_list); + INIT_LIST_HEAD(&ucontext->rule_list); + rcu_read_lock(); + ucontext->tgid = get_pid(task_pid_group_leader(current)); + rcu_read_unlock(); + ucontext->closing = 0; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + ucontext->umem_tree = RB_ROOT; + init_rwsem(&ucontext->umem_rwsem); + ucontext->odp_mrs_count = 0; + INIT_LIST_HEAD(&ucontext->no_private_counters); + + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + ucontext->invalidate_range = NULL; + +#endif + + resp.num_comp_vectors = file->device->num_comp_vectors; + + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + goto err_free; + resp.async_fd = ret; + + filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_fd; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_file; + } + + file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + + mutex_unlock(&file->mutex); + + return in_len; + +err_file: + ib_uverbs_free_async_event_file(file); + fput(filp); + +err_fd: + put_unused_fd(resp.async_fd); + +err_free: + put_pid(ucontext->tgid); + ib_dev->dealloc_ucontext(ucontext); + +err: + mutex_unlock(&file->mutex); + return ret; +} + +static void copy_query_dev_fields(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_uverbs_query_device_resp *resp, + struct ib_device_attr *attr) +{ + resp->fw_ver = attr->fw_ver; + resp->node_guid = ib_dev->node_guid; + resp->sys_image_guid = attr->sys_image_guid; + resp->max_mr_size = attr->max_mr_size; + resp->page_size_cap = attr->page_size_cap; + resp->vendor_id = attr->vendor_id; + resp->vendor_part_id = attr->vendor_part_id; + resp->hw_ver = attr->hw_ver; + resp->max_qp = attr->max_qp; + resp->max_qp_wr = attr->max_qp_wr; + resp->device_cap_flags = (u32)(attr->device_cap_flags); + resp->max_sge = attr->max_sge; + resp->max_sge_rd = attr->max_sge_rd; + resp->max_cq = attr->max_cq; + resp->max_cqe = attr->max_cqe; + resp->max_mr = attr->max_mr; + resp->max_pd = attr->max_pd; + resp->max_qp_rd_atom = attr->max_qp_rd_atom; + resp->max_ee_rd_atom = attr->max_ee_rd_atom; + resp->max_res_rd_atom = attr->max_res_rd_atom; + resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom; + resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom; + resp->atomic_cap = attr->atomic_cap; + resp->max_ee = attr->max_ee; + resp->max_rdd = attr->max_rdd; + resp->max_mw = attr->max_mw; + resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp; + resp->max_raw_ethy_qp = attr->max_raw_ethy_qp; + resp->max_mcast_grp = attr->max_mcast_grp; + resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; + resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; + resp->max_ah = attr->max_ah; + resp->max_fmr = attr->max_fmr; + resp->max_map_per_fmr = attr->max_map_per_fmr; + resp->max_srq = attr->max_srq; + resp->max_srq_wr = attr->max_srq_wr; + resp->max_srq_sge = attr->max_srq_sge; + resp->max_pkeys = attr->max_pkeys; + resp->local_ca_ack_delay = attr->local_ca_ack_delay; + resp->phys_port_cnt = ib_dev->phys_port_cnt; +} + +ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_device cmd; + struct ib_uverbs_query_device_resp resp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_port cmd; + struct ib_uverbs_query_port_resp resp; + struct ib_port_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_port(ib_dev, cmd.port_num, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.state = attr.state; + resp.max_mtu = attr.max_mtu; + resp.active_mtu = attr.active_mtu; + resp.gid_tbl_len = attr.gid_tbl_len; + resp.port_cap_flags = attr.port_cap_flags; + resp.max_msg_sz = attr.max_msg_sz; + resp.bad_pkey_cntr = attr.bad_pkey_cntr; + resp.qkey_viol_cntr = attr.qkey_viol_cntr; + resp.pkey_tbl_len = attr.pkey_tbl_len; + resp.lid = attr.lid; + resp.sm_lid = attr.sm_lid; + resp.lmc = attr.lmc; + resp.max_vl_num = attr.max_vl_num; + resp.sm_sl = attr.sm_sl; + resp.subnet_timeout = attr.subnet_timeout; + resp.init_type_reply = attr.init_type_reply; + resp.active_width = attr.active_width; + resp.active_speed = attr.active_speed; + resp.phys_state = attr.phys_state; + resp.link_layer = rdma_port_get_link_layer(ib_dev, + cmd.port_num); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_alloc_pd cmd; + struct ib_uverbs_alloc_pd_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, 0, file->ucontext, &pd_lock_class); + down_write(&uobj->mutex); + + pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto err; + } + + pd->device = ib_dev; + pd->uobject = uobj; + pd->__internal_mr = NULL; + atomic_set(&pd->usecnt, 0); + + uobj->object = pd; + ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.pd_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + +err_idr: + ib_dealloc_pd(pd); + +err: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_dealloc_pd cmd; + struct ib_uobject *uobj; + struct ib_pd *pd; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); + if (!uobj) + return -EINVAL; + pd = uobj->object; + + if (atomic_read(&pd->usecnt)) { + ret = -EBUSY; + goto err_put; + } + + ret = pd->device->dealloc_pd(uobj->object); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); + if (ret) + goto err_put; + + uobj->live = 0; + put_uobj_write(uobj); + + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; + +err_put: + put_uobj_write(uobj); + return ret; +} + +struct xrcd_table_entry { + struct rb_node node; + struct ib_xrcd *xrcd; + struct inode *inode; +}; + +static int xrcd_table_insert(struct ib_uverbs_device *dev, + struct inode *inode, + struct ib_xrcd *xrcd) +{ + struct xrcd_table_entry *entry, *scan; + struct rb_node **p = &dev->xrcd_tree.rb_node; + struct rb_node *parent = NULL; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->xrcd = xrcd; + entry->inode = inode; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (inode < scan->inode) { + p = &(*p)->rb_left; + } else if (inode > scan->inode) { + p = &(*p)->rb_right; + } else { + kfree(entry); + return -EEXIST; + } + } + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &dev->xrcd_tree); + igrab(inode); + return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + struct rb_node *p = dev->xrcd_tree.rb_node; + + while (p) { + entry = rb_entry(p, struct xrcd_table_entry, node); + + if (inode < entry->inode) + p = p->rb_left; + else if (inode > entry->inode) + p = p->rb_right; + else + return entry; + } + + return NULL; +} + +static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (!entry) + return NULL; + + return entry->xrcd; +} + +static void xrcd_table_delete(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (entry) { + iput(inode); + rb_erase(&entry->node, &dev->xrcd_tree); + kfree(entry); + } +} + +ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_open_xrcd cmd; + struct ib_uverbs_open_xrcd_resp resp; + struct ib_udata udata; + struct ib_uxrcd_object *obj; + struct ib_xrcd *xrcd = NULL; + struct fd f = {NULL}; + struct inode *inode = NULL; + int ret = 0; + int new_xrcd = 0; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&file->device->xrcd_tree_mutex); + + if (cmd.fd != -1) { + /* search for file descriptor */ + f = fdget(cmd.fd); + if (!f.file) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + inode = f.file->f_dentry->d_inode; + xrcd = find_xrcd(file->device, inode); + if (!xrcd && !(cmd.oflags & O_CREAT)) { + /* no file descriptor. Need CREATE flag */ + ret = -EAGAIN; + goto err_tree_mutex_unlock; + } + + if (xrcd && cmd.oflags & O_EXCL) { + ret = -EINVAL; + goto err_tree_mutex_unlock; + } + } + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) { + ret = -ENOMEM; + goto err_tree_mutex_unlock; + } + + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); + + down_write(&obj->uobject.mutex); + + if (!xrcd) { + xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); + if (IS_ERR(xrcd)) { + ret = PTR_ERR(xrcd); + goto err; + } + + xrcd->inode = inode; + xrcd->device = ib_dev; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + new_xrcd = 1; + } + + atomic_set(&obj->refcnt, 0); + obj->uobject.object = xrcd; + ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.xrcd_handle = obj->uobject.id; + + if (inode) { + if (new_xrcd) { + /* create new inode/xrcd table entry */ + ret = xrcd_table_insert(file->device, inode, xrcd); + if (ret) + goto err_insert_xrcd; + } + atomic_inc(&xrcd->usecnt); + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + if (f.file) + fdput(f); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + up_write(&obj->uobject.mutex); + + mutex_unlock(&file->device->xrcd_tree_mutex); + return in_len; + +err_copy: + if (inode) { + if (new_xrcd) + xrcd_table_delete(file->device, inode); + atomic_dec(&xrcd->usecnt); + } + +err_insert_xrcd: + idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + +err_idr: + ib_dealloc_xrcd(xrcd); + +err: + put_uobj_write(&obj->uobject); + +err_tree_mutex_unlock: + if (f.file) + fdput(f); + + mutex_unlock(&file->device->xrcd_tree_mutex); + + return ret; +} + +ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_close_xrcd cmd; + struct ib_uobject *uobj; + struct ib_xrcd *xrcd = NULL; + struct inode *inode = NULL; + struct ib_uxrcd_object *obj; + int live; + int ret = 0; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->device->xrcd_tree_mutex); + uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); + if (!uobj) { + ret = -EINVAL; + goto out; + } + + xrcd = uobj->object; + inode = xrcd->inode; + obj = container_of(uobj, struct ib_uxrcd_object, uobject); + if (atomic_read(&obj->refcnt)) { + put_uobj_write(uobj); + ret = -EBUSY; + goto out; + } + + if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { + ret = ib_dealloc_xrcd(uobj->object); + if (!ret) + uobj->live = 0; + } + + live = uobj->live; + if (inode && ret) + atomic_inc(&xrcd->usecnt); + + put_uobj_write(uobj); + + if (ret) + goto out; + + if (inode && !live) + xrcd_table_delete(file->device, inode); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + ret = in_len; + +out: + mutex_unlock(&file->device->xrcd_tree_mutex); + return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd) +{ + struct inode *inode; + + inode = xrcd->inode; + if (inode && !atomic_dec_and_test(&xrcd->usecnt)) + return; + + ib_dealloc_xrcd(xrcd); + + if (inode) + xrcd_table_delete(dev, inode); +} + +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; + + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + return ret; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, 0, file->ucontext, &mr_lock_class); + down_write(&uobj->mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_free; + } + + if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { + if (!(pd->device->attrs.device_cap_flags & + IB_DEVICE_ON_DEMAND_PAGING)) { + pr_debug("ODP support not available\n"); + ret = -EINVAL; + goto err_put; + } + } + + mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, + cmd.access_flags, &udata); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto err_put; + } + + mr->device = pd->device; + mr->pd = pd; + mr->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mr; + ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); + if (ret) + goto err_unreg; + + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + resp.mr_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->mr_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + +err_unreg: + ib_dereg_mr(mr); + +err_put: + put_pd_read(pd); + +err_free: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_rereg_mr cmd; + struct ib_uverbs_rereg_mr_resp resp; + struct ib_udata udata; + struct ib_pd *pd = NULL; + struct ib_mr *mr; + struct ib_pd *old_pd; + int ret; + struct ib_uobject *uobj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + return -EINVAL; + + if ((cmd.flags & IB_MR_REREG_TRANS) && + (!cmd.start || !cmd.hca_va || 0 >= cmd.length || + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) + return -EINVAL; + + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, + file->ucontext); + + if (!uobj) + return -EINVAL; + + mr = uobj->object; + + if (cmd.flags & IB_MR_REREG_ACCESS) { + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + goto put_uobjs; + } + + if (cmd.flags & IB_MR_REREG_PD) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto put_uobjs; + } + } + + old_pd = mr->pd; + ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, + cmd.length, cmd.hca_va, + cmd.access_flags, pd, &udata); + if (!ret) { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_inc(&pd->usecnt); + mr->pd = pd; + atomic_dec(&old_pd->usecnt); + } + } else { + goto put_uobj_pd; + } + + memset(&resp, 0, sizeof(resp)); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + else + ret = in_len; + +put_uobj_pd: + if (cmd.flags & IB_MR_REREG_PD) + put_pd_read(pd); + +put_uobjs: + + put_uobj_write(mr->uobject); + + return ret; +} + +ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mr cmd; + struct ib_mr *mr; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); + if (!uobj) + return -EINVAL; + + mr = uobj->object; + + ret = ib_dereg_mr(mr); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; +} + +ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_alloc_mw cmd; + struct ib_uverbs_alloc_mw_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_mw *mw; + struct ib_udata udata; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, 0, file->ucontext, &mw_lock_class); + down_write(&uobj->mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_free; + } + + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); + + mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); + if (IS_ERR(mw)) { + ret = PTR_ERR(mw); + goto err_put; + } + + mw->device = pd->device; + mw->pd = pd; + mw->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mw; + ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); + if (ret) + goto err_unalloc; + + memset(&resp, 0, sizeof(resp)); + resp.rkey = mw->rkey; + resp.mw_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err_copy; + } + + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->mw_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + +err_unalloc: + uverbs_dealloc_mw(mw); + +err_put: + put_pd_read(pd); + +err_free: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dealloc_mw cmd; + struct ib_mw *mw; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); + if (!uobj) + return -EINVAL; + + mw = uobj->object; + + ret = uverbs_dealloc_mw(mw); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; +} + +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + return ret; + resp.fd = ret; + + filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); + if (IS_ERR(filp)) { + put_unused_fd(resp.fd); + return PTR_ERR(filp); + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + +static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_cq *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *udata, + void *context), + void *context) +{ + struct ib_ucq_object *obj; + struct ib_uverbs_event_file *ev_file = NULL; + struct ib_cq *cq; + int ret; + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_cq_init_attr attr = {}; + + if (cmd->comp_vector >= file->device->num_comp_vectors) + return ERR_PTR(-EINVAL); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); + down_write(&obj->uobject.mutex); + + if (cmd->comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); + if (!ev_file) { + ret = -EINVAL; + goto err; + } + } + + obj->uverbs_file = file; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); + + attr.cqe = cmd->cqe; + attr.comp_vector = cmd->comp_vector; + + if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) + attr.flags = cmd->flags; + + cq = ib_dev->create_cq(ib_dev, &attr, + file->ucontext, uhw); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err_file; + } + + cq->device = ib_dev; + cq->uobject = &obj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = ev_file; + atomic_set(&cq->usecnt, 0); + + obj->uobject.object = cq; + ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); + if (ret) + goto err_free; + + memset(&resp, 0, sizeof resp); + resp.base.cq_handle = obj->uobject.id; + resp.base.cqe = cq->cqe; + + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, obj, &resp, ucore, context); + if (ret) + goto err_cb; + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); + + return obj; + +err_cb: + idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); + +err_free: + ib_destroy_cq(cq); + +err_file: + if (ev_file) + ib_uverbs_release_ucq(file, ev_file, obj); + +err: + put_uobj_write(&obj->uobject); + + return ERR_PTR(ret); +} + +static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_ex_create_cq cmd_ex; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata ucore; + struct ib_udata uhw; + struct ib_ucq_object *obj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); + + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.cqe = cmd.cqe; + cmd_ex.comp_vector = cmd.comp_vector; + cmd_ex.comp_channel = cmd.comp_channel; + + obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), comp_channel) + + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, + NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return in_len; +} + +static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_uverbs_ex_create_cq cmd; + struct ib_ucq_object *obj; + int err; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + obj = create_cq(file, ib_dev, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_cq_cb, NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return 0; +} + +ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_resize_cq cmd; + struct ib_uverbs_resize_cq_resp resp; + struct ib_udata udata; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) + return -EINVAL; + + ret = cq->device->resize_cq(cq, cmd.cqe, &udata); + if (ret) + goto out; + + resp.cqe = cq->cqe; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp.cqe)) + ret = -EFAULT; + +out: + put_cq_read(cq); + + return ret ? ret : in_len; +} + +static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +{ + struct ib_uverbs_wc tmp; + + tmp.wr_id = wc->wr_id; + tmp.status = wc->status; + tmp.opcode = wc->opcode; + tmp.vendor_err = wc->vendor_err; + tmp.byte_len = wc->byte_len; + tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data; + tmp.qp_num = wc->qp->qp_num; + tmp.src_qp = wc->src_qp; + tmp.wc_flags = wc->wc_flags; + tmp.pkey_index = wc->pkey_index; + tmp.slid = wc->slid; + tmp.sl = wc->sl; + tmp.dlid_path_bits = wc->dlid_path_bits; + tmp.port_num = wc->port_num; + tmp.reserved = 0; + + if (copy_to_user(dest, &tmp, sizeof tmp)) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_poll_cq cmd; + struct ib_uverbs_poll_cq_resp resp; + u8 __user *header_ptr; + u8 __user *data_ptr; + struct ib_cq *cq; + struct ib_wc wc; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) + return -EINVAL; + + /* we copy a struct ib_uverbs_poll_cq_resp to user space */ + header_ptr = (void __user *)(unsigned long) cmd.response; + data_ptr = header_ptr + sizeof resp; + + memset(&resp, 0, sizeof resp); + while (resp.count < cmd.ne) { + ret = ib_poll_cq(cq, 1, &wc); + if (ret < 0) + goto out_put; + if (!ret) + break; + + ret = copy_wc_to_user(data_ptr, &wc); + if (ret) + goto out_put; + + data_ptr += sizeof(struct ib_uverbs_wc); + ++resp.count; + } + + if (copy_to_user(header_ptr, &resp, sizeof resp)) { + ret = -EFAULT; + goto out_put; + } + + ret = in_len; + +out_put: + put_cq_read(cq); + return ret; +} + +ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_req_notify_cq cmd; + struct ib_cq *cq; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) + return -EINVAL; + + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + + put_cq_read(cq); + + return in_len; +} + +ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_cq cmd; + struct ib_uverbs_destroy_cq_resp resp; + struct ib_uobject *uobj; + struct ib_cq *cq; + struct ib_ucq_object *obj; + struct ib_uverbs_event_file *ev_file; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + cq = uobj->object; + ev_file = cq->cq_context; + obj = container_of(cq->uobject, struct ib_ucq_object, uobject); + + ret = ib_destroy_cq(cq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_ucq(file, ev_file, obj); + + memset(&resp, 0, sizeof resp); + resp.comp_events_reported = obj->comp_events_reported; + resp.async_events_reported = obj->async_events_reported; + + put_uobj(uobj); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +static int create_qp(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_qp *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *udata), + void *context) +{ + struct ib_uqp_object *obj; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; + struct ib_qp *qp; + char *buf; + struct ib_qp_init_attr attr = {}; + struct ib_uverbs_ex_create_qp_resp resp; + int ret; + struct ib_rwq_ind_table *ind_tbl = NULL; + bool has_sq = true; + + if (cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW) != 0) + return -EPERM; + + obj = kzalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, + &qp_lock_class); + down_write(&obj->uevent.uobject.mutex); + if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + + sizeof(cmd->rwq_ind_tbl_handle) && + (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, + file->ucontext); + if (!ind_tbl) { + ret = -EINVAL; + goto err_put; + } + + attr.rwq_ind_tbl = ind_tbl; + } + + if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + + sizeof(cmd->reserved1)) && cmd->reserved1) { + ret = -EOPNOTSUPP; + goto err_put; + } + + if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { + ret = -EINVAL; + goto err_put; + } + + if (ind_tbl && !cmd->max_send_wr) + has_sq = false; + + if (cmd->qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, + &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + device = xrcd->device; + } else { + if (cmd->qp_type == IB_QPT_XRC_INI) { + cmd->max_recv_wr = 0; + cmd->max_recv_sge = 0; + } else { + if (cmd->is_srq) { + srq = idr_read_srq(cmd->srq_handle, + file->ucontext); + if (!srq || srq->srq_type != IB_SRQT_BASIC) { + ret = -EINVAL; + goto err_put; + } + } + + if (!ind_tbl) { + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } + } + } + } + + if (has_sq) + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + if (!ind_tbl) + rcq = rcq ?: scq; + pd = idr_read_pd(cmd->pd_handle, file->ucontext); + if (!pd || (!scq && has_sq)) { + ret = -EINVAL; + goto err_put; + } + + device = pd->device; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.send_cq = scq; + attr.recv_cq = rcq; + attr.srq = srq; + attr.xrcd = xrcd; + attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : + IB_SIGNAL_REQ_WR; + attr.qp_type = cmd->qp_type; + attr.create_flags = 0; + + attr.cap.max_send_wr = cmd->max_send_wr; + attr.cap.max_recv_wr = cmd->max_recv_wr; + attr.cap.max_send_sge = cmd->max_send_sge; + attr.cap.max_recv_sge = cmd->max_recv_sge; + attr.cap.max_inline_data = cmd->max_inline_data; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + + if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + + sizeof(cmd->create_flags)) + attr.create_flags = cmd->create_flags; + + if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV | + IB_QP_CREATE_SCATTER_FCS)) { + ret = -EINVAL; + goto err_put; + } + + buf = (char *)cmd + sizeof(*cmd); + if (cmd_sz > sizeof(*cmd)) + if (!(buf[0] == 0 && !memcmp(buf, buf + 1, + cmd_sz - sizeof(*cmd) - 1))) { + ret = -EINVAL; + goto err_put; + } + + if (cmd->qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, &attr); + else + qp = device->create_qp(pd, &attr, uhw); + + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + if (cmd->qp_type != IB_QPT_XRC_TGT) { + qp->real_qp = qp; + qp->device = device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->rwq_ind_tbl = ind_tbl; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); + atomic_inc(&pd->usecnt); + if (attr.send_cq) + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + if (ind_tbl) + atomic_inc(&ind_tbl->usecnt); + } + qp->uobject = &obj->uevent.uobject; + + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.base.qpn = qp->qp_num; + resp.base.qp_handle = obj->uevent.uobject.id; + resp.base.max_recv_sge = attr.cap.max_recv_sge; + resp.base.max_send_sge = attr.cap.max_send_sge; + resp.base.max_recv_wr = attr.cap.max_recv_wr; + resp.base.max_send_wr = attr.cap.max_send_wr; + resp.base.max_inline_data = attr.cap.max_inline_data; + + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, &resp, ucore); + if (ret) + goto err_cb; + + if (xrcd) { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, + uobject); + atomic_inc(&obj->uxrcd->refcnt); + put_xrcd_read(xrcd_uobj); + } + + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); + + return 0; +err_cb: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); + +err_put: + if (xrcd) + put_xrcd_read(xrcd_uobj); + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) + put_cq_read(rcq); + if (srq) + put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); + + put_uobj_write(&obj->uevent.uobject); + return ret; +} + +static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_ex_create_qp cmd_ex; + struct ib_udata ucore; + struct ib_udata uhw; + ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); + int err; + + if (out_len < resp_size) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), + resp_size); + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + resp_size, + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - resp_size); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.pd_handle = cmd.pd_handle; + cmd_ex.send_cq_handle = cmd.send_cq_handle; + cmd_ex.recv_cq_handle = cmd.recv_cq_handle; + cmd_ex.srq_handle = cmd.srq_handle; + cmd_ex.max_send_wr = cmd.max_send_wr; + cmd_ex.max_recv_wr = cmd.max_recv_wr; + cmd_ex.max_send_sge = cmd.max_send_sge; + cmd_ex.max_recv_sge = cmd.max_recv_sge; + cmd_ex.max_inline_data = cmd.max_inline_data; + cmd_ex.sq_sig_all = cmd.sq_sig_all; + cmd_ex.qp_type = cmd.qp_type; + cmd_ex.is_srq = cmd.is_srq; + + err = create_qp(file, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), is_srq) + + sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, + NULL); + + if (err) + return err; + + return in_len; +} + +static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_qp_resp resp; + struct ib_uverbs_ex_create_qp cmd = {0}; + int err; + + if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + + sizeof(cmd.comp_mask))) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + err = create_qp(file, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_qp_cb, NULL); + + if (err) + return err; + + return 0; +} + +ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_open_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uqp_object *obj; + struct ib_xrcd *xrcd; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_qp *qp; + struct ib_qp_open_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); + down_write(&obj->uevent.uobject.mutex); + + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.qp_num = cmd.qpn; + attr.qp_type = cmd.qp_type; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + + qp = ib_open_qp(xrcd, &attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + qp->uobject = &obj->uevent.uobject; + + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_remove; + } + + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + put_xrcd_read(xrcd_uobj); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); + + return in_len; + +err_remove: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); + +err_put: + put_xrcd_read(xrcd_uobj); + put_uobj_write(&obj->uevent.uobject); + return ret; +} + +ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_query_qp cmd; + struct ib_uverbs_query_qp_resp resp; + struct ib_qp *qp; + struct ib_qp_attr *attr; + struct ib_qp_init_attr *init_attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto out; + } + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + ret = -EINVAL; + goto out; + } + + ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); + + put_qp_read(qp); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + + resp.qp_state = attr->qp_state; + resp.cur_qp_state = attr->cur_qp_state; + resp.path_mtu = attr->path_mtu; + resp.path_mig_state = attr->path_mig_state; + resp.qkey = attr->qkey; + resp.rq_psn = attr->rq_psn; + resp.sq_psn = attr->sq_psn; + resp.dest_qp_num = attr->dest_qp_num; + resp.qp_access_flags = attr->qp_access_flags; + resp.pkey_index = attr->pkey_index; + resp.alt_pkey_index = attr->alt_pkey_index; + resp.sq_draining = attr->sq_draining; + resp.max_rd_atomic = attr->max_rd_atomic; + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; + resp.min_rnr_timer = attr->min_rnr_timer; + resp.port_num = attr->port_num; + resp.timeout = attr->timeout; + resp.retry_cnt = attr->retry_cnt; + resp.rnr_retry = attr->rnr_retry; + resp.alt_port_num = attr->alt_port_num; + resp.alt_timeout = attr->alt_timeout; + + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); + resp.dest.flow_label = attr->ah_attr.grh.flow_label; + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; + resp.dest.dlid = attr->ah_attr.dlid; + resp.dest.sl = attr->ah_attr.sl; + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; + resp.dest.static_rate = attr->ah_attr.static_rate; + resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); + resp.dest.port_num = attr->ah_attr.port_num; + + memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); + resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; + resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; + resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; + resp.alt_dest.sl = attr->alt_ah_attr.sl; + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; + resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + + resp.max_send_wr = init_attr->cap.max_send_wr; + resp.max_recv_wr = init_attr->cap.max_recv_wr; + resp.max_send_sge = init_attr->cap.max_send_sge; + resp.max_recv_sge = init_attr->cap.max_recv_sge; + resp.max_inline_data = init_attr->cap.max_inline_data; + resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + kfree(attr); + kfree(init_attr); + + return ret ? ret : in_len; +} + +/* Remove ignored fields set in the attribute mask */ +static int modify_qp_mask(enum ib_qp_type qp_type, int mask) +{ + switch (qp_type) { + case IB_QPT_XRC_INI: + return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + case IB_QPT_XRC_TGT: + return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY); + default: + return mask; + } +} + +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_qp cmd; + struct ib_udata udata; + struct ib_qp *qp; + struct ib_qp_attr *attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + ret = -EINVAL; + goto out; + } + + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->alt_pkey_index = cmd.alt_pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + if (qp->real_qp == qp) { + ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); + if (ret) + goto release_qp; + ret = qp->device->modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + } else { + ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + } + + if (ret) + goto release_qp; + + ret = in_len; + +release_qp: + put_qp_read(qp); + +out: + kfree(attr); + + return ret; +} + +ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_qp cmd; + struct ib_uverbs_destroy_qp_resp resp; + struct ib_uobject *uobj; + struct ib_qp *qp; + struct ib_uqp_object *obj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); + if (!uobj) + return -EINVAL; + qp = uobj->object; + obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); + + if (!list_empty(&obj->mcast_list)) { + put_uobj_write(uobj); + return -EBUSY; + } + + ret = ib_destroy_qp(qp); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + if (obj->uxrcd) + atomic_dec(&obj->uxrcd->refcnt); + + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, &obj->uevent); + + resp.events_reported = obj->uevent.events_reported; + + put_uobj(uobj); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +static void *alloc_wr(size_t wr_size, __u32 num_sge) +{ + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + + num_sge * sizeof (struct ib_sge), GFP_KERNEL); +}; + +ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_send cmd; + struct ib_uverbs_post_send_resp resp; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_qp *qp; + int i, sg_ind; + int is_ud; + ssize_t ret = -EINVAL; + size_t next_size; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) + goto out; + + is_ud = qp->qp_type == IB_QPT_UD; + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out_put; + } + + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out_put; + } + + if (is_ud) { + struct ib_ud_wr *ud; + + if (user_wr->opcode != IB_WR_SEND && + user_wr->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + + next_size = sizeof(*ud); + ud = alloc_wr(next_size, user_wr->num_sge); + if (!ud) { + ret = -ENOMEM; + goto out_put; + } + + ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + if (!ud->ah) { + kfree(ud); + ret = -EINVAL; + goto out_put; + } + ud->remote_qpn = user_wr->wr.ud.remote_qpn; + ud->remote_qkey = user_wr->wr.ud.remote_qkey; + + next = &ud->wr; + } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE || + user_wr->opcode == IB_WR_RDMA_READ) { + struct ib_rdma_wr *rdma; + + next_size = sizeof(*rdma); + rdma = alloc_wr(next_size, user_wr->num_sge); + if (!rdma) { + ret = -ENOMEM; + goto out_put; + } + + rdma->remote_addr = user_wr->wr.rdma.remote_addr; + rdma->rkey = user_wr->wr.rdma.rkey; + + next = &rdma->wr; + } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + struct ib_atomic_wr *atomic; + + next_size = sizeof(*atomic); + atomic = alloc_wr(next_size, user_wr->num_sge); + if (!atomic) { + ret = -ENOMEM; + goto out_put; + } + + atomic->remote_addr = user_wr->wr.atomic.remote_addr; + atomic->compare_add = user_wr->wr.atomic.compare_add; + atomic->swap = user_wr->wr.atomic.swap; + atomic->rkey = user_wr->wr.atomic.rkey; + + next = &atomic->wr; + } else if (user_wr->opcode == IB_WR_SEND || + user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_SEND_WITH_INV) { + next_size = sizeof(*next); + next = alloc_wr(next_size, user_wr->num_sge); + if (!next) { + ret = -ENOMEM; + goto out_put; + } + } else { + ret = -EINVAL; + goto out_put; + } + + if (user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + next->ex.imm_data = + (__be32 __force) user_wr->ex.imm_data; + } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { + next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + + if (next->num_sge) { + next->sg_list = (void *)((char *)next + + ALIGN(next_size, sizeof(struct ib_sge))); + if (copy_from_user(next->sg_list, + (const char *)buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out_put; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + resp.bad_wr = 0; + ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out_put: + put_qp_read(qp); + + while (wr) { + if (is_ud && ud_wr(wr)->ah) + put_ah_read(ud_wr(wr)->ah); + next = wr->next; + kfree(wr); + wr = next; + } + +out: + kfree(user_wr); + + return ret ? ret : in_len; +} + +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *)((char *)next + + ALIGN(sizeof *next, sizeof (struct ib_sge))); + if (copy_from_user(next->sg_list, + (const char *)buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + +ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_recv cmd; + struct ib_uverbs_post_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_qp *qp; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) + goto out; + + resp.bad_wr = 0; + ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); + + put_qp_read(qp); + + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + goto out; + + resp.bad_wr = 0; + ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + + put_srq_read(srq); + + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_ah cmd; + struct ib_uverbs_create_ah_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_ah *ah; + struct ib_ah_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); + down_write(&uobj->mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err; + } + + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.grh.traffic_class = cmd.attr.grh.traffic_class; + memset(&attr.dmac, 0, sizeof(attr.dmac)); + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + + ah = ib_create_ah(pd, &attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_put; + } + + ah->uobject = uobj; + uobj->object = ah; + + ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); + if (ret) + goto err_destroy; + + resp.ah_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->ah_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + +err_destroy: + ib_destroy_ah(ah); + +err_put: + put_pd_read(pd); + +err: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_destroy_ah cmd; + struct ib_ah *ah; + struct ib_uobject *uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); + if (!uobj) + return -EINVAL; + ah = uobj->object; + + ret = ib_destroy_ah(ah); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; +} + +ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_attach_mcast cmd; + struct ib_qp *qp; + struct ib_uqp_object *obj; + struct ib_uverbs_mcast_entry *mcast; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + qp = idr_write_qp(cmd.qp_handle, file->ucontext); + if (!qp) + return -EINVAL; + + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + list_for_each_entry(mcast, &obj->mcast_list, list) + if (cmd.mlid == mcast->lid && + !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { + ret = 0; + goto out_put; + } + + mcast = kmalloc(sizeof *mcast, GFP_KERNEL); + if (!mcast) { + ret = -ENOMEM; + goto out_put; + } + + mcast->lid = cmd.mlid; + memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); + + ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); + if (!ret) + list_add_tail(&mcast->list, &obj->mcast_list); + else + kfree(mcast); + +out_put: + put_qp_write(qp); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_detach_mcast cmd; + struct ib_uqp_object *obj; + struct ib_qp *qp; + struct ib_uverbs_mcast_entry *mcast; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + qp = idr_write_qp(cmd.qp_handle, file->ucontext); + if (!qp) + return -EINVAL; + + ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + if (ret) + goto out_put; + + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + list_for_each_entry(mcast, &obj->mcast_list, list) + if (cmd.mlid == mcast->lid && + !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { + list_del(&mcast->list); + kfree(mcast); + break; + } + +out_put: + put_qp_write(qp); + + return ret ? ret : in_len; +} + +static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) +{ + /* Returns user space filter size, includes padding */ + return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; +} + +static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, + u16 ib_real_filter_sz) +{ + /* + * User space filter structures must be 64 bit aligned, otherwise this + * may pass, but we won't handle additional new attributes. + */ + + if (kern_filter_size > ib_real_filter_sz) { + if (memchr_inv((char *)kern_spec_filter + + ib_real_filter_sz, 0, + kern_filter_size - ib_real_filter_sz)) + return -EINVAL; + return ib_real_filter_sz; + } + return kern_filter_size; +} + +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ssize_t actual_filter_sz; + ssize_t kern_filter_sz; + ssize_t ib_filter_sz; + void *kern_spec_mask; + void *kern_spec_val; + + if (kern_spec->reserved) + return -EINVAL; + + ib_spec->type = kern_spec->type; + + kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + /* User flow spec size must be aligned to 4 bytes */ + if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) + return -EINVAL; + + kern_spec_val = (char *)kern_spec + + sizeof(struct ib_uverbs_flow_spec_hdr); + kern_spec_mask = (char *)kern_spec_val + kern_filter_sz; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->size = sizeof(struct ib_flow_spec_eth); + memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); + break; + case IB_FLOW_SPEC_IPV4: + ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->size = sizeof(struct ib_flow_spec_ipv4); + memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); + break; + case IB_FLOW_SPEC_IPV6: + ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->size = sizeof(struct ib_flow_spec_ipv6); + memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) || + (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) + return -EINVAL; + break; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); + memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); + break; + default: + return -EINVAL; + } + return 0; +} + +int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq_resp resp = {}; + struct ib_uwq_object *obj; + int err = 0; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_wq *wq; + struct ib_wq_init_attr wq_init_attr = {}; + size_t required_cmd_sz; + size_t required_resp_len; + + required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); + required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, + &wq_lock_class); + down_write(&obj->uevent.uobject.mutex); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + err = -EINVAL; + goto err_uobj; + } + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { + err = -EINVAL; + goto err_put_pd; + } + + wq_init_attr.cq = cq; + wq_init_attr.max_sge = cmd.max_sge; + wq_init_attr.max_wr = cmd.max_wr; + wq_init_attr.wq_context = file; + wq_init_attr.wq_type = cmd.wq_type; + wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + if (IS_ERR(wq)) { + err = PTR_ERR(wq); + goto err_put_cq; + } + + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + wq->wq_type = wq_init_attr.wq_type; + wq->cq = cq; + wq->pd = pd; + wq->device = pd->device; + wq->wq_context = wq_init_attr.wq_context; + atomic_set(&wq->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&cq->usecnt); + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + if (err) + goto destroy_wq; + + memset(&resp, 0, sizeof(resp)); + resp.wq_handle = obj->uevent.uobject.id; + resp.max_sge = wq_init_attr.max_sge; + resp.max_wr = wq_init_attr.max_wr; + resp.wqn = wq->wq_num; + resp.response_length = required_resp_len; + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + put_pd_read(pd); + put_cq_read(cq); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + up_write(&obj->uevent.uobject.mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); +destroy_wq: + ib_destroy_wq(wq); +err_put_cq: + put_cq_read(cq); +err_put_pd: + put_pd_read(pd); +err_uobj: + put_uobj_write(&obj->uevent.uobject); + + return err; +} + +int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq_resp resp = {}; + struct ib_wq *wq; + struct ib_uobject *uobj; + struct ib_uwq_object *obj; + size_t required_cmd_sz; + size_t required_resp_len; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); + required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + resp.response_length = required_resp_len; + uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + + wq = uobj->object; + obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); + ret = ib_destroy_wq(wq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, &obj->uevent); + resp.events_reported = obj->uevent.events_reported; + put_uobj(uobj); + + ret = ib_copy_to_udata(ucore, &resp, resp.response_length); + if (ret) + return ret; + + return 0; +} + +int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_wq *wq; + struct ib_wq_attr wq_attr = {}; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask) + return -EINVAL; + + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + return -EINVAL; + + wq = idr_read_wq(cmd.wq_handle, file->ucontext); + if (!wq) + return -EINVAL; + + wq_attr.curr_wq_state = cmd.curr_wq_state; + wq_attr.wq_state = cmd.wq_state; + ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); + put_wq_read(wq); + return ret; +} + +int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; + struct ib_uobject *uobj; + int err = 0; + struct ib_rwq_ind_table_init_attr init_attr = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_wq **wqs = NULL; + u32 *wqs_handles = NULL; + struct ib_wq *wq = NULL; + int i, j, num_read_wqs; + u32 num_wq_handles; + u32 expected_in_size; + size_t required_cmd_sz_header; + size_t required_resp_len; + + required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); + required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); + + if (ucore->inlen < required_cmd_sz_header) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + if (err) + return err; + + ucore->inbuf = (const char *)ucore->inbuf + required_cmd_sz_header; + ucore->inlen -= required_cmd_sz_header; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) + return -EINVAL; + + num_wq_handles = 1 << cmd.log_ind_tbl_size; + expected_in_size = num_wq_handles * sizeof(__u32); + if (num_wq_handles == 1) + /* input size for wq handles is u64 aligned */ + expected_in_size += sizeof(__u32); + + if (ucore->inlen < expected_in_size) + return -EINVAL; + + if (ucore->inlen > expected_in_size && + !ib_is_udata_cleared(ucore, expected_in_size, + ucore->inlen - expected_in_size)) + return -EOPNOTSUPP; + + wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), + GFP_KERNEL); + if (!wqs_handles) + return -ENOMEM; + + err = ib_copy_from_udata(wqs_handles, ucore, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); + if (!wqs) { + err = -ENOMEM; + goto err_free; + } + + for (num_read_wqs = 0; num_read_wqs < num_wq_handles; + num_read_wqs++) { + wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + if (!wq) { + err = -EINVAL; + goto put_wqs; + } + + wqs[num_read_wqs] = wq; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto put_wqs; + } + + init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); + down_write(&uobj->mutex); + init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; + init_attr.ind_tbl = wqs; + rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + + if (IS_ERR(rwq_ind_tbl)) { + err = PTR_ERR(rwq_ind_tbl); + goto err_uobj; + } + + rwq_ind_tbl->ind_tbl = wqs; + rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; + rwq_ind_tbl->uobject = uobj; + uobj->object = rwq_ind_tbl; + rwq_ind_tbl->device = ib_dev; + atomic_set(&rwq_ind_tbl->usecnt, 0); + + for (i = 0; i < num_wq_handles; i++) + atomic_inc(&wqs[i]->usecnt); + + err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + if (err) + goto destroy_ind_tbl; + + resp.ind_tbl_handle = uobj->id; + resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; + resp.response_length = required_resp_len; + + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + kfree(wqs_handles); + + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); +destroy_ind_tbl: + ib_destroy_rwq_ind_table(rwq_ind_tbl); +err_uobj: + put_uobj_write(uobj); +put_wqs: + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); +err_free: + kfree(wqs_handles); + kfree(wqs); + return err; +} + +int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_uobject *uobj; + int ret; + struct ib_wq **ind_tbl; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + rwq_ind_tbl = uobj->object; + ind_tbl = rwq_ind_tbl->ind_tbl; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + kfree(ind_tbl); + return ret; +} + +int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_create_flow cmd; + struct ib_uverbs_create_flow_resp resp; + struct ib_uobject *uobj; + struct ib_flow *flow_id; + struct ib_uverbs_flow_attr *kern_flow_attr; + struct ib_flow_attr *flow_attr; + struct ib_qp *qp; + int err = 0; + void *kern_spec; + void *ib_spec; + int i; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + if (ucore->outlen < sizeof(resp)) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + ucore->inbuf = (const char *)ucore->inbuf + sizeof(cmd); + ucore->inlen -= sizeof(cmd); + + if (cmd.comp_mask) + return -EINVAL; + + if (priv_check(curthread, PRIV_NET_RAW) != 0) + return -EPERM; + + if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) + return -EINVAL; + + if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && + ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) || + (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT))) + return -EINVAL; + + if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) + return -EINVAL; + + if (cmd.flow_attr.size > ucore->inlen || + cmd.flow_attr.size > + (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) + return -EINVAL; + + if (cmd.flow_attr.reserved[0] || + cmd.flow_attr.reserved[1]) + return -EINVAL; + + if (cmd.flow_attr.num_of_specs) { + kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, + GFP_KERNEL); + if (!kern_flow_attr) + return -ENOMEM; + + memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); + err = ib_copy_from_udata(kern_flow_attr + 1, ucore, + cmd.flow_attr.size); + if (err) + goto err_free_attr; + } else { + kern_flow_attr = &cmd.flow_attr; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto err_free_attr; + } + init_uobj(uobj, 0, file->ucontext, &rule_lock_class); + down_write(&uobj->mutex); + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + err = -EINVAL; + goto err_uobj; + } + + flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * + sizeof(union ib_flow_spec), GFP_KERNEL); + if (!flow_attr) { + err = -ENOMEM; + goto err_put; + } + + flow_attr->type = kern_flow_attr->type; + flow_attr->priority = kern_flow_attr->priority; + flow_attr->num_of_specs = kern_flow_attr->num_of_specs; + flow_attr->port = kern_flow_attr->port; + flow_attr->flags = kern_flow_attr->flags; + flow_attr->size = sizeof(*flow_attr); + + kern_spec = kern_flow_attr + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs && + cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && + cmd.flow_attr.size >= + ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { + err = kern_spec_to_ib_spec(kern_spec, ib_spec); + if (err) + goto err_free; + flow_attr->size += + ((union ib_flow_spec *) ib_spec)->size; + cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; + kern_spec = (char *)kern_spec + ((struct ib_uverbs_flow_spec *) kern_spec)->size; + ib_spec = (char *)ib_spec + ((union ib_flow_spec *)ib_spec)->size; + } + if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { + pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", + i, cmd.flow_attr.size); + err = -EINVAL; + goto err_free; + } + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + if (IS_ERR(flow_id)) { + err = PTR_ERR(flow_id); + goto err_free; + } + flow_id->qp = qp; + flow_id->uobject = uobj; + uobj->object = flow_id; + + err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + if (err) + goto destroy_flow; + + memset(&resp, 0, sizeof(resp)); + resp.flow_handle = uobj->id; + + err = ib_copy_to_udata(ucore, + &resp, sizeof(resp)); + if (err) + goto err_copy; + + put_qp_read(qp); + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rule_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return 0; +err_copy: + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: + ib_destroy_flow(flow_id); +err_free: + kfree(flow_attr); +err_put: + put_qp_read(qp); +err_uobj: + put_uobj_write(uobj); +err_free_attr: + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return err; +} + +int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_destroy_flow cmd; + struct ib_flow *flow_id; + struct ib_uobject *uobj; + int ret; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EINVAL; + + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + flow_id = uobj->object; + + ret = ib_destroy_flow(flow_id); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return ret; +} + +static int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_uverbs_create_xsrq *cmd, + struct ib_udata *udata) +{ + struct ib_uverbs_create_srq_resp resp; + struct ib_usrq_object *obj; + struct ib_pd *pd; + struct ib_srq *srq; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_srq_init_attr attr; + int ret; + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); + down_write(&obj->uevent.uobject.mutex); + + if (cmd->srq_type == IB_SRQT_XRC) { + attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err; + } + + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_xrcd; + } + } + + pd = idr_read_pd(cmd->pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_put_cq; + } + + attr.event_handler = ib_uverbs_srq_event_handler; + attr.srq_context = file; + attr.srq_type = cmd->srq_type; + attr.attr.max_wr = cmd->max_wr; + attr.attr.max_sge = cmd->max_sge; + attr.attr.srq_limit = cmd->srq_limit; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + + srq = pd->device->create_srq(pd, &attr, udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_put; + } + + srq->device = pd->device; + srq->pd = pd; + srq->srq_type = cmd->srq_type; + srq->uobject = &obj->uevent.uobject; + srq->event_handler = attr.event_handler; + srq->srq_context = attr.srq_context; + + if (cmd->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.cq = attr.ext.xrc.cq; + srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; + atomic_inc(&attr.ext.xrc.cq->usecnt); + atomic_inc(&attr.ext.xrc.xrcd->usecnt); + } + + atomic_inc(&pd->usecnt); + atomic_set(&srq->usecnt, 0); + + obj->uevent.uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.srq_handle = obj->uevent.uobject.id; + resp.max_wr = attr.attr.max_wr; + resp.max_sge = attr.attr.max_sge; + if (cmd->srq_type == IB_SRQT_XRC) + resp.srqn = srq->ext.xrc.srq_num; + + if (copy_to_user((void __user *) (unsigned long) cmd->response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + if (cmd->srq_type == IB_SRQT_XRC) { + put_uobj_read(xrcd_uobj); + put_cq_read(attr.ext.xrc.cq); + } + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); + + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_srq(srq); + +err_put: + put_pd_read(pd); + +err_put_cq: + if (cmd->srq_type == IB_SRQT_XRC) + put_cq_read(attr.ext.xrc.cq); + +err_put_xrcd: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } + +err: + put_uobj_write(&obj->uevent.uobject); + return ret; +} + +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_xsrq xcmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + xcmd.response = cmd.response; + xcmd.user_handle = cmd.user_handle; + xcmd.srq_type = IB_SRQT_BASIC; + xcmd.pd_handle = cmd.pd_handle; + xcmd.max_wr = cmd.max_wr; + xcmd.max_sge = cmd.max_sge; + xcmd.srq_limit = cmd.srq_limit; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); + if (ret) + return ret; + + return in_len; +} + +ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_create_xsrq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + +ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_srq cmd; + struct ib_udata udata; + struct ib_srq *srq; + struct ib_srq_attr attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + return -EINVAL; + + attr.max_wr = cmd.max_wr; + attr.srq_limit = cmd.srq_limit; + + ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); + + put_srq_read(srq); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_srq cmd; + struct ib_uverbs_query_srq_resp resp; + struct ib_srq_attr attr; + struct ib_srq *srq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq) + return -EINVAL; + + ret = ib_query_srq(srq, &attr); + + put_srq_read(srq); + + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.max_wr = attr.max_wr; + resp.max_sge = attr.max_sge; + resp.srq_limit = attr.srq_limit; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_srq cmd; + struct ib_uverbs_destroy_srq_resp resp; + struct ib_uobject *uobj; + struct ib_srq *srq; + struct ib_uevent_object *obj; + int ret = -EINVAL; + struct ib_usrq_object *us; + enum ib_srq_type srq_type; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); + if (!uobj) + return -EINVAL; + srq = uobj->object; + obj = container_of(uobj, struct ib_uevent_object, uobject); + srq_type = srq->srq_type; + + ret = ib_destroy_srq(srq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + if (srq_type == IB_SRQT_XRC) { + us = container_of(obj, struct ib_usrq_object, uevent); + atomic_dec(&us->uxrcd->refcnt); + } + + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, obj); + + memset(&resp, 0, sizeof resp); + resp.events_reported = obj->events_reported; + + put_uobj(uobj); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + + return ret ? ret : in_len; +} + +int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_query_device_resp resp = { {0} }; + struct ib_uverbs_ex_query_device cmd; + struct ib_device_attr attr = {0}; + int err; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + resp.response_length = offsetof(typeof(resp), odp_caps); + + if (ucore->outlen < resp.response_length) + return -ENOSPC; + + err = ib_dev->query_device(ib_dev, &attr, uhw); + if (err) + return err; + + copy_query_dev_fields(file, ib_dev, &resp.base, &attr); + + if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) + goto end; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + resp.odp_caps.general_caps = attr.odp_caps.general_caps; + resp.odp_caps.per_transport_caps.rc_odp_caps = + attr.odp_caps.per_transport_caps.rc_odp_caps; + resp.odp_caps.per_transport_caps.uc_odp_caps = + attr.odp_caps.per_transport_caps.uc_odp_caps; + resp.odp_caps.per_transport_caps.ud_odp_caps = + attr.odp_caps.per_transport_caps.ud_odp_caps; +#endif + resp.response_length += sizeof(resp.odp_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) + goto end; + + resp.timestamp_mask = attr.timestamp_mask; + resp.response_length += sizeof(resp.timestamp_mask); + + if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) + goto end; + + resp.hca_core_clock = attr.hca_core_clock; + resp.response_length += sizeof(resp.hca_core_clock); + + if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) + goto end; + + resp.device_cap_flags_ex = attr.device_cap_flags; + resp.response_length += sizeof(resp.device_cap_flags_ex); + + if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) + goto end; + + resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; + resp.rss_caps.max_rwq_indirection_tables = + attr.rss_caps.max_rwq_indirection_tables; + resp.rss_caps.max_rwq_indirection_table_size = + attr.rss_caps.max_rwq_indirection_table_size; + + resp.response_length += sizeof(resp.rss_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) + goto end; + + resp.max_wq_type_rq = attr.max_wq_type_rq; + resp.response_length += sizeof(resp.max_wq_type_rq); +end: + err = ib_copy_to_udata(ucore, &resp, resp.response_length); + return err; +} Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c (revision 320592) @@ -0,0 +1,1434 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "uverbs.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand userspace verbs access"); +MODULE_LICENSE("Dual BSD/GPL"); + +enum { + IB_UVERBS_MAJOR = 231, + IB_UVERBS_BASE_MINOR = 192, + IB_UVERBS_MAX_DEVICES = 32 +}; + +#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) + +static struct class *uverbs_class; + +DEFINE_SPINLOCK(ib_uverbs_idr_lock); +DEFINE_IDR(ib_uverbs_pd_idr); +DEFINE_IDR(ib_uverbs_mr_idr); +DEFINE_IDR(ib_uverbs_mw_idr); +DEFINE_IDR(ib_uverbs_ah_idr); +DEFINE_IDR(ib_uverbs_cq_idr); +DEFINE_IDR(ib_uverbs_qp_idr); +DEFINE_IDR(ib_uverbs_srq_idr); +DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr); +DEFINE_IDR(ib_uverbs_wq_idr); +DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); + +static DEFINE_SPINLOCK(map_lock); +static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); + +static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) = { + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, + [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, + [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, + [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, + [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, +}; + +static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) = { + [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, + [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, + [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, + [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, + [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, + [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, + [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, + [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, + [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, +}; + +static void ib_uverbs_add_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); + +int uverbs_dealloc_mw(struct ib_mw *mw) +{ + struct ib_pd *pd = mw->pd; + int ret; + + ret = mw->device->dealloc_mw(mw); + if (!ret) + atomic_dec(&pd->usecnt); + return ret; +} + +static void ib_uverbs_release_dev(struct kobject *kobj) +{ + struct ib_uverbs_device *dev = + container_of(kobj, struct ib_uverbs_device, kobj); + + cleanup_srcu_struct(&dev->disassociate_srcu); + kfree(dev); +} + +static struct kobj_type ib_uverbs_dev_ktype = { + .release = ib_uverbs_release_dev, +}; + +static void ib_uverbs_release_event_file(struct kref *ref) +{ + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); + + kfree(file); +} + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); + } + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +static void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj) +{ + struct ib_uverbs_mcast_entry *mcast, *tmp; + + list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { + ib_detach_mcast(qp, &mcast->gid, mcast->lid); + list_del(&mcast->list); + kfree(mcast); + } +} + +static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, + struct ib_ucontext *context) +{ + struct ib_uobject *uobj, *tmp; + + context->closing = 1; + + list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { + struct ib_ah *ah = uobj->object; + + idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + ib_destroy_ah(ah); + kfree(uobj); + } + + /* Remove MWs before QPs, in order to support type 2A MWs. */ + list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { + struct ib_mw *mw = uobj->object; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + uverbs_dealloc_mw(mw); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { + struct ib_flow *flow_id = uobj->object; + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + ib_destroy_flow(flow_id); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { + struct ib_qp *qp = uobj->object; + struct ib_uqp_object *uqp = + container_of(uobj, struct ib_uqp_object, uevent.uobject); + + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); + if (qp == qp->real_qp) + ib_uverbs_detach_umcast(qp, uqp); + ib_destroy_qp(qp); + ib_uverbs_release_uevent(file, &uqp->uevent); + kfree(uqp); + } + + list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { + struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + ib_destroy_rwq_ind_table(rwq_ind_tbl); + kfree(ind_tbl); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { + struct ib_wq *wq = uobj->object; + struct ib_uwq_object *uwq = + container_of(uobj, struct ib_uwq_object, uevent.uobject); + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + ib_destroy_wq(wq); + ib_uverbs_release_uevent(file, &uwq->uevent); + kfree(uwq); + } + + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { + struct ib_srq *srq = uobj->object; + struct ib_uevent_object *uevent = + container_of(uobj, struct ib_uevent_object, uobject); + + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); + ib_destroy_srq(srq); + ib_uverbs_release_uevent(file, uevent); + kfree(uevent); + } + + list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { + struct ib_cq *cq = uobj->object; + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobj, struct ib_ucq_object, uobject); + + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); + ib_destroy_cq(cq); + ib_uverbs_release_ucq(file, ev_file, ucq); + kfree(ucq); + } + + list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { + struct ib_mr *mr = uobj->object; + + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + ib_dereg_mr(mr); + kfree(uobj); + } + + mutex_lock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { + struct ib_xrcd *xrcd = uobj->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobj, struct ib_uxrcd_object, uobject); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + ib_uverbs_dealloc_xrcd(file->device, xrcd); + kfree(uxrcd); + } + mutex_unlock(&file->device->xrcd_tree_mutex); + + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { + struct ib_pd *pd = uobj->object; + + idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + ib_dealloc_pd(pd); + kfree(uobj); + } + + put_pid(context->tgid); + + return context->device->dealloc_ucontext(context); +} + +static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) +{ + complete(&dev->comp); +} + +static void ib_uverbs_release_file(struct kref *ref) +{ + struct ib_uverbs_file *file = + container_of(ref, struct ib_uverbs_file, ref); + struct ib_device *ib_dev; + int srcu_key; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (ib_dev && !ib_dev->disassociate_ucontext) + module_put(ib_dev->owner); + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + + if (atomic_dec_and_test(&file->device->refcount)) + ib_uverbs_comp_dev(file->device); + + kfree(file); +} + +static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *event; + int eventsz; + int ret = 0; + + spin_lock_irq(&file->lock); + + while (list_empty(&file->event_list)) { + spin_unlock_irq(&file->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->poll_wait, + (!list_empty(&file->event_list) || + /* The barriers built into wait_event_interruptible() + * and wake_up() guarentee this will see the null set + * without using RCU + */ + !file->uverbs_file->device->ib_dev))) + return -ERESTARTSYS; + + /* If device was disassociated and no event exists set an error */ + if (list_empty(&file->event_list) && + !file->uverbs_file->device->ib_dev) + return -EIO; + + spin_lock_irq(&file->lock); + } + + event = list_entry(file->event_list.next, struct ib_uverbs_event, list); + + if (file->is_async) + eventsz = sizeof (struct ib_uverbs_async_event_desc); + else + eventsz = sizeof (struct ib_uverbs_comp_event_desc); + + if (eventsz > count) { + ret = -EINVAL; + event = NULL; + } else { + list_del(file->event_list.next); + if (event->counter) { + ++(*event->counter); + list_del(&event->obj_list); + } + } + + spin_unlock_irq(&file->lock); + + if (event) { + if (copy_to_user(buf, event, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + } + + kfree(event); + + return ret; +} + +static unsigned int ib_uverbs_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + unsigned int pollflags = 0; + struct ib_uverbs_event_file *file = filp->private_data; + + poll_wait(filp, &file->poll_wait, wait); + + spin_lock_irq(&file->lock); + if (!list_empty(&file->event_list)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&file->lock); + + return pollflags; +} + +static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) +{ + struct ib_uverbs_event_file *file = filp->private_data; + + return fasync_helper(fd, filp, on, &file->async_queue); +} + +static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + int closed_already = 0; + + mutex_lock(&file->uverbs_file->device->lists_mutex); + spin_lock_irq(&file->lock); + closed_already = file->is_closed; + file->is_closed = 1; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); + if (!closed_already) { + list_del(&file->list); + if (file->is_async) + ib_unregister_event_handler(&file->uverbs_file-> + event_handler); + } + mutex_unlock(&file->uverbs_file->device->lists_mutex); + + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + kref_put(&file->ref, ib_uverbs_release_event_file); + + return 0; +} + +static const struct file_operations uverbs_event_fops = { + .owner = THIS_MODULE, + .read = ib_uverbs_event_read, + .poll = ib_uverbs_event_poll, + .release = ib_uverbs_event_close, + .fasync = ib_uverbs_event_fasync, + .llseek = no_llseek, +}; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct ib_uverbs_event_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (file->is_closed) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } + + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + + entry->desc.comp.cq_handle = cq->uobject->user_handle; + entry->counter = &uobj->comp_events_reported; + + list_add_tail(&entry->list, &file->event_list); + list_add_tail(&entry->obj_list, &uobj->comp_list); + spin_unlock_irqrestore(&file->lock, flags); + + wake_up_interruptible(&file->poll_wait); + linux_poll_wakeup(file->filp); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); +} + +static void ib_uverbs_async_handler(struct ib_uverbs_file *file, + __u64 element, __u64 event, + struct list_head *obj_list, + u32 *counter) +{ + struct ib_uverbs_event *entry; + unsigned long flags; + + spin_lock_irqsave(&file->async_file->lock, flags); + if (file->async_file->is_closed) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + + entry->desc.async.element = element; + entry->desc.async.event_type = event; + entry->desc.async.reserved = 0; + entry->counter = counter; + + list_add_tail(&entry->list, &file->async_file->event_list); + if (obj_list) + list_add_tail(&entry->obj_list, obj_list); + spin_unlock_irqrestore(&file->async_file->lock, flags); + + wake_up_interruptible(&file->async_file->poll_wait); + linux_poll_wakeup(file->async_file->filp); + kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); +} + +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, + struct ib_ucq_object, uobject); + + ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, + event->event, &uobj->async_list, + &uobj->async_events_reported); +} + +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj; + + /* for XRC target qp's, check that qp is live */ + if (!event->element.qp->uobject || !event->element.qp->uobject->live) + return; + + uobj = container_of(event->element.qp->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + +void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj; + + uobj = container_of(event->element.srq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_uverbs_file *file = + container_of(handler, struct ib_uverbs_file, event_handler); + + ib_uverbs_async_handler(file, event->element.port_num, event->event, + NULL, NULL); +} + +void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) +{ + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + file->async_file = NULL; +} + +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev, + int is_async) +{ + struct ib_uverbs_event_file *ev_file; + struct file *filp; + int ret; + + ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); + if (!ev_file) + return ERR_PTR(-ENOMEM); + + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->uverbs_file = uverbs_file; + kref_get(&ev_file->uverbs_file->ref); + ev_file->async_queue = NULL; + ev_file->is_closed = 0; + + /* + * fops_get() can't fail here, because we're coming from a + * system call on a uverbs file, which will already have a + * module reference. + */ + filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); + if (IS_ERR(filp)) + goto err_put_refs; + filp->private_data = ev_file; + ev_file->filp = filp; + + mutex_lock(&uverbs_file->device->lists_mutex); + list_add_tail(&ev_file->list, + &uverbs_file->device->uverbs_events_file_list); + mutex_unlock(&uverbs_file->device->lists_mutex); + + if (is_async) { + WARN_ON(uverbs_file->async_file); + uverbs_file->async_file = ev_file; + kref_get(&uverbs_file->async_file->ref); + INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, + ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&uverbs_file->event_handler); + if (ret) + goto err_put_file; + + /* At that point async file stuff was fully set */ + ev_file->is_async = 1; + } + + return filp; + +err_put_file: + fput(filp); + kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); + uverbs_file->async_file = NULL; + return ERR_PTR(ret); + +err_put_refs: + kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); + kref_put(&ev_file->ref, ib_uverbs_release_event_file); + return filp; +} + +/* + * Look up a completion event file by FD. If lookup is successful, + * takes a ref to the event file struct that it returns; if + * unsuccessful, returns NULL. + */ +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +{ + struct ib_uverbs_event_file *ev_file = NULL; + struct fd f = fdget(fd); + + if (!f.file) + return NULL; + + if (f.file->f_op != &uverbs_event_fops) + goto out; + + ev_file = f.file->private_data; + if (ev_file->is_async) { + ev_file = NULL; + goto out; + } + + kref_get(&ev_file->ref); + +out: + fdput(f); + return ev_file; +} + +static int verify_command_mask(struct ib_device *ib_dev, __u32 command) +{ + u64 mask; + + if (command <= IB_USER_VERBS_CMD_OPEN_QP) + mask = ib_dev->uverbs_cmd_mask; + else + mask = ib_dev->uverbs_ex_cmd_mask; + + if (mask & ((u64)1 << command)) + return 0; + + return -1; +} + +static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_device *ib_dev; + struct ib_uverbs_cmd_hdr hdr; + __u32 command; + __u32 flags; + int srcu_key; + ssize_t ret; + + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + + if (count < sizeof hdr) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof hdr)) + return -EFAULT; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto out; + } + + if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) { + ret = -EINVAL; + goto out; + } + + command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + if (verify_command_mask(ib_dev, command)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (!file->ucontext && + command != IB_USER_VERBS_CMD_GET_CONTEXT) { + ret = -EINVAL; + goto out; + } + + flags = (hdr.command & + IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; + + if (!flags) { + if (command >= ARRAY_SIZE(uverbs_cmd_table) || + !uverbs_cmd_table[command]) { + ret = -EINVAL; + goto out; + } + + if (hdr.in_words * 4 != count) { + ret = -EINVAL; + goto out; + } + + ret = uverbs_cmd_table[command](file, ib_dev, + buf + sizeof(hdr), + hdr.in_words * 4, + hdr.out_words * 4); + + } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { + struct ib_uverbs_ex_cmd_hdr ex_hdr; + struct ib_udata ucore; + struct ib_udata uhw; + size_t written_count = count; + + if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || + !uverbs_ex_cmd_table[command]) { + ret = -ENOSYS; + goto out; + } + + if (!file->ucontext) { + ret = -EINVAL; + goto out; + } + + if (count < (sizeof(hdr) + sizeof(ex_hdr))) { + ret = -EINVAL; + goto out; + } + + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { + ret = -EFAULT; + goto out; + } + + count -= sizeof(hdr) + sizeof(ex_hdr); + buf += sizeof(hdr) + sizeof(ex_hdr); + + if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { + ret = -EINVAL; + goto out; + } + + if (ex_hdr.cmd_hdr_reserved) { + ret = -EINVAL; + goto out; + } + + if (ex_hdr.response) { + if (!hdr.out_words && !ex_hdr.provider_out_words) { + ret = -EINVAL; + goto out; + } + + if (!access_ok(VERIFY_WRITE, + (void __user *) (unsigned long) ex_hdr.response, + (hdr.out_words + ex_hdr.provider_out_words) * 8)) { + ret = -EFAULT; + goto out; + } + } else { + if (hdr.out_words || ex_hdr.provider_out_words) { + ret = -EINVAL; + goto out; + } + } + + INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, + hdr.in_words * 8, hdr.out_words * 8); + + INIT_UDATA_BUF_OR_NULL(&uhw, + buf + ucore.inlen, + (unsigned long) ex_hdr.response + ucore.outlen, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); + + ret = uverbs_ex_cmd_table[command](file, + ib_dev, + &ucore, + &uhw); + if (!ret) + ret = written_count; + } else { + ret = -ENOSYS; + } + +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return ret; +} + +static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_device *ib_dev; + int ret = 0; + int srcu_key; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto out; + } + + if (!file->ucontext) + ret = -ENODEV; + else + ret = ib_dev->mmap(file->ucontext, vma); +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return ret; +} + +/* + * ib_uverbs_open() does not need the BKL: + * + * - the ib_uverbs_device structures are properly reference counted and + * everything else is purely local to the file being created, so + * races against other open calls are not a problem; + * - there is no ioctl method to race against; + * - the open method will either immediately run -ENXIO, or all + * required initialization will be done. + */ +static int ib_uverbs_open(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_device *dev; + struct ib_uverbs_file *file; + struct ib_device *ib_dev; + int ret; + int module_dependent; + int srcu_key; + + dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev); + if (!atomic_inc_not_zero(&dev->refcount)) + return -ENXIO; + + srcu_key = srcu_read_lock(&dev->disassociate_srcu); + mutex_lock(&dev->lists_mutex); + ib_dev = srcu_dereference(dev->ib_dev, + &dev->disassociate_srcu); + if (!ib_dev) { + ret = -EIO; + goto err; + } + + /* In case IB device supports disassociate ucontext, there is no hard + * dependency between uverbs device and its low level device. + */ + module_dependent = !(ib_dev->disassociate_ucontext); + + if (module_dependent) { + if (!try_module_get(ib_dev->owner)) { + ret = -ENODEV; + goto err; + } + } + + file = kzalloc(sizeof(*file), GFP_KERNEL); + if (!file) { + ret = -ENOMEM; + if (module_dependent) + goto err_module; + + goto err; + } + + file->device = dev; + file->ucontext = NULL; + file->async_file = NULL; + kref_init(&file->ref); + mutex_init(&file->mutex); + mutex_init(&file->cleanup_mutex); + + filp->private_data = file; + kobject_get(&dev->kobj); + list_add_tail(&file->list, &dev->uverbs_file_list); + mutex_unlock(&dev->lists_mutex); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + + return nonseekable_open(inode, filp); + +err_module: + module_put(ib_dev->owner); + +err: + mutex_unlock(&dev->lists_mutex); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + if (atomic_dec_and_test(&dev->refcount)) + ib_uverbs_comp_dev(dev); + + return ret; +} + +static int ib_uverbs_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_device *dev = file->device; + + mutex_lock(&file->cleanup_mutex); + if (file->ucontext) { + ib_uverbs_cleanup_ucontext(file, file->ucontext); + file->ucontext = NULL; + } + mutex_unlock(&file->cleanup_mutex); + + mutex_lock(&file->device->lists_mutex); + if (!file->is_closed) { + list_del(&file->list); + file->is_closed = 1; + } + mutex_unlock(&file->device->lists_mutex); + + if (file->async_file) + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + + kref_put(&file->ref, ib_uverbs_release_file); + kobject_put(&dev->kobj); + + return 0; +} + +static const struct file_operations uverbs_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .open = ib_uverbs_open, + .release = ib_uverbs_close, + .llseek = no_llseek, +}; + +static const struct file_operations uverbs_mmap_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .mmap = ib_uverbs_mmap, + .open = ib_uverbs_open, + .release = ib_uverbs_close, + .llseek = no_llseek, +}; + +static struct ib_client uverbs_client = { + .name = "uverbs", + .add = ib_uverbs_add_one, + .remove = ib_uverbs_remove_one +}; + +static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, + char *buf) +{ + int ret = -ENODEV; + int srcu_key; + struct ib_uverbs_device *dev = dev_get_drvdata(device); + struct ib_device *ib_dev; + + if (!dev) + return -ENODEV; + + srcu_key = srcu_read_lock(&dev->disassociate_srcu); + ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); + if (ib_dev) + ret = sprintf(buf, "%s\n", ib_dev->name); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + + return ret; +} +static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static ssize_t show_dev_abi_version(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ib_uverbs_device *dev = dev_get_drvdata(device); + int ret = -ENODEV; + int srcu_key; + struct ib_device *ib_dev; + + if (!dev) + return -ENODEV; + srcu_key = srcu_read_lock(&dev->disassociate_srcu); + ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); + if (ib_dev) + ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); + srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + + return ret; +} +static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); + +static CLASS_ATTR_STRING(abi_version, S_IRUGO, + __stringify(IB_USER_VERBS_ABI_VERSION)); + +static dev_t overflow_maj; +static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); + +/* + * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by + * requesting a new major number and doubling the number of max devices we + * support. It's stupid, but simple. + */ +static int find_overflow_devnum(void) +{ + int ret; + + if (!overflow_maj) { + ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + pr_err("user_verbs: couldn't register dynamic device number\n"); + return ret; + } + } + + ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); + if (ret >= IB_UVERBS_MAX_DEVICES) + return -1; + + return ret; +} + +static ssize_t +show_dev_device(struct device *device, struct device_attribute *attr, char *buf) +{ + struct ib_uverbs_device *dev = dev_get_drvdata(device); + + if (!dev || !dev->ib_dev->dma_device) + return -ENODEV; + + return sprintf(buf, "0x%04x\n", + ((struct pci_dev *)dev->ib_dev->dma_device)->device); +} +static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL); + +static ssize_t +show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) +{ + struct ib_uverbs_device *dev = dev_get_drvdata(device); + + if (!dev || !dev->ib_dev->dma_device) + return -ENODEV; + + return sprintf(buf, "0x%04x\n", + ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); +} +static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); + +struct attribute *device_attrs[] = +{ + &dev_attr_device.attr, + &dev_attr_vendor.attr, + NULL +}; + +static struct attribute_group device_group = { + .name = "device", + .attrs = device_attrs +}; + +static void ib_uverbs_add_one(struct ib_device *device) +{ + int devnum; + dev_t base; + struct ib_uverbs_device *uverbs_dev; + int ret; + + if (!device->alloc_ucontext) + return; + + uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); + if (!uverbs_dev) + return; + + ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); + if (ret) { + kfree(uverbs_dev); + return; + } + + atomic_set(&uverbs_dev->refcount, 1); + init_completion(&uverbs_dev->comp); + uverbs_dev->xrcd_tree = RB_ROOT; + mutex_init(&uverbs_dev->xrcd_tree_mutex); + kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); + mutex_init(&uverbs_dev->lists_mutex); + INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); + INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); + + spin_lock(&map_lock); + devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (devnum >= IB_UVERBS_MAX_DEVICES) { + spin_unlock(&map_lock); + devnum = find_overflow_devnum(); + if (devnum < 0) + goto err; + + spin_lock(&map_lock); + uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; + base = devnum + overflow_maj; + set_bit(devnum, overflow_map); + } else { + uverbs_dev->devnum = devnum; + base = devnum + IB_UVERBS_BASE_DEV; + set_bit(devnum, dev_map); + } + spin_unlock(&map_lock); + + rcu_assign_pointer(uverbs_dev->ib_dev, device); + uverbs_dev->num_comp_vectors = device->num_comp_vectors; + + cdev_init(&uverbs_dev->cdev, NULL); + uverbs_dev->cdev.owner = THIS_MODULE; + uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; + kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->cdev, base, 1)) + goto err_cdev; + + uverbs_dev->dev = device_create(uverbs_class, device->dma_device, + uverbs_dev->cdev.dev, uverbs_dev, + "uverbs%d", uverbs_dev->devnum); + if (IS_ERR(uverbs_dev->dev)) + goto err_cdev; + + if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) + goto err_class; + if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) + goto err_class; + if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) + goto err_class; + + ib_set_client_data(device, &uverbs_client, uverbs_dev); + + return; + +err_class: + device_destroy(uverbs_class, uverbs_dev->cdev.dev); + +err_cdev: + cdev_del(&uverbs_dev->cdev); + if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) + clear_bit(devnum, dev_map); + else + clear_bit(devnum, overflow_map); + +err: + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); + wait_for_completion(&uverbs_dev->comp); + kobject_put(&uverbs_dev->kobj); + return; +} + +static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, + struct ib_device *ib_dev) +{ + struct ib_uverbs_file *file; + struct ib_uverbs_event_file *event_file; + struct ib_event event; + + /* Pending running commands to terminate */ + synchronize_srcu(&uverbs_dev->disassociate_srcu); + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + event.device = ib_dev; + + mutex_lock(&uverbs_dev->lists_mutex); + while (!list_empty(&uverbs_dev->uverbs_file_list)) { + struct ib_ucontext *ucontext; + file = list_first_entry(&uverbs_dev->uverbs_file_list, + struct ib_uverbs_file, list); + file->is_closed = 1; + list_del(&file->list); + kref_get(&file->ref); + mutex_unlock(&uverbs_dev->lists_mutex); + + ib_uverbs_event_handler(&file->event_handler, &event); + + mutex_lock(&file->cleanup_mutex); + ucontext = file->ucontext; + file->ucontext = NULL; + mutex_unlock(&file->cleanup_mutex); + + /* At this point ib_uverbs_close cannot be running + * ib_uverbs_cleanup_ucontext + */ + if (ucontext) { + /* We must release the mutex before going ahead and + * calling disassociate_ucontext. disassociate_ucontext + * might end up indirectly calling uverbs_close, + * for example due to freeing the resources + * (e.g mmput). + */ + ib_dev->disassociate_ucontext(ucontext); + ib_uverbs_cleanup_ucontext(file, ucontext); + } + + mutex_lock(&uverbs_dev->lists_mutex); + kref_put(&file->ref, ib_uverbs_release_file); + } + + while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { + event_file = list_first_entry(&uverbs_dev-> + uverbs_events_file_list, + struct ib_uverbs_event_file, + list); + spin_lock_irq(&event_file->lock); + event_file->is_closed = 1; + spin_unlock_irq(&event_file->lock); + + list_del(&event_file->list); + if (event_file->is_async) { + ib_unregister_event_handler(&event_file->uverbs_file-> + event_handler); + event_file->uverbs_file->event_handler.device = NULL; + } + + wake_up_interruptible(&event_file->poll_wait); + linux_poll_wakeup(event_file->filp); + kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + } + mutex_unlock(&uverbs_dev->lists_mutex); +} + +static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) +{ + struct ib_uverbs_device *uverbs_dev = client_data; + int wait_clients = 1; + + if (!uverbs_dev) + return; + + sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); + dev_set_drvdata(uverbs_dev->dev, NULL); + device_destroy(uverbs_class, uverbs_dev->cdev.dev); + cdev_del(&uverbs_dev->cdev); + + if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) + clear_bit(uverbs_dev->devnum, dev_map); + else + clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); + + if (device->disassociate_ucontext) { + /* We disassociate HW resources and immediately return. + * Userspace will see a EIO errno for all future access. + * Upon returning, ib_device may be freed internally and is not + * valid any more. + * uverbs_device is still available until all clients close + * their files, then the uverbs device ref count will be zero + * and its resources will be freed. + * Note: At this point no more files can be opened since the + * cdev was deleted, however active clients can still issue + * commands and close their open files. + */ + rcu_assign_pointer(uverbs_dev->ib_dev, NULL); + ib_uverbs_free_hw_resources(uverbs_dev, device); + wait_clients = 0; + } + + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); + if (wait_clients) + wait_for_completion(&uverbs_dev->comp); + kobject_put(&uverbs_dev->kobj); +} + +static char *uverbs_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +static int __init ib_uverbs_init(void) +{ + int ret; + + ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + pr_err("user_verbs: couldn't register device number\n"); + goto out; + } + + uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); + if (IS_ERR(uverbs_class)) { + ret = PTR_ERR(uverbs_class); + pr_err("user_verbs: couldn't create class infiniband_verbs\n"); + goto out_chrdev; + } + + uverbs_class->devnode = uverbs_devnode; + + ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); + if (ret) { + pr_err("user_verbs: couldn't create abi_version attribute\n"); + goto out_class; + } + + ret = ib_register_client(&uverbs_client); + if (ret) { + pr_err("user_verbs: couldn't register client\n"); + goto out_class; + } + + return 0; + +out_class: + class_destroy(uverbs_class); + +out_chrdev: + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + +out: + return ret; +} + +static void __exit ib_uverbs_cleanup(void) +{ + ib_unregister_client(&uverbs_client); + class_destroy(uverbs_class); + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + if (overflow_maj) + unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); + idr_destroy(&ib_uverbs_pd_idr); + idr_destroy(&ib_uverbs_mr_idr); + idr_destroy(&ib_uverbs_mw_idr); + idr_destroy(&ib_uverbs_ah_idr); + idr_destroy(&ib_uverbs_cq_idr); + idr_destroy(&ib_uverbs_qp_idr); + idr_destroy(&ib_uverbs_srq_idr); +} + +module_init_order(ib_uverbs_init, SI_ORDER_THIRD); +module_exit(ib_uverbs_cleanup); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c (revision 320592) @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, + struct ib_ah_attr *src) +{ + memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); + dst->grh.flow_label = src->grh.flow_label; + dst->grh.sgid_index = src->grh.sgid_index; + dst->grh.hop_limit = src->grh.hop_limit; + dst->grh.traffic_class = src->grh.traffic_class; + memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + dst->dlid = src->dlid; + dst->sl = src->sl; + dst->src_path_bits = src->src_path_bits; + dst->static_rate = src->static_rate; + dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; + dst->port_num = src->port_num; + dst->reserved = 0; +} +EXPORT_SYMBOL(ib_copy_ah_attr_to_user); + +void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, + struct ib_qp_attr *src) +{ + dst->qp_state = src->qp_state; + dst->cur_qp_state = src->cur_qp_state; + dst->path_mtu = src->path_mtu; + dst->path_mig_state = src->path_mig_state; + dst->qkey = src->qkey; + dst->rq_psn = src->rq_psn; + dst->sq_psn = src->sq_psn; + dst->dest_qp_num = src->dest_qp_num; + dst->qp_access_flags = src->qp_access_flags; + + dst->max_send_wr = src->cap.max_send_wr; + dst->max_recv_wr = src->cap.max_recv_wr; + dst->max_send_sge = src->cap.max_send_sge; + dst->max_recv_sge = src->cap.max_recv_sge; + dst->max_inline_data = src->cap.max_inline_data; + + ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); + ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); + + dst->pkey_index = src->pkey_index; + dst->alt_pkey_index = src->alt_pkey_index; + dst->en_sqd_async_notify = src->en_sqd_async_notify; + dst->sq_draining = src->sq_draining; + dst->max_rd_atomic = src->max_rd_atomic; + dst->max_dest_rd_atomic = src->max_dest_rd_atomic; + dst->min_rnr_timer = src->min_rnr_timer; + dst->port_num = src->port_num; + dst->timeout = src->timeout; + dst->retry_cnt = src->retry_cnt; + dst->rnr_retry = src->rnr_retry; + dst->alt_port_num = src->alt_port_num; + dst->alt_timeout = src->alt_timeout; + memset(dst->reserved, 0, sizeof(dst->reserved)); +} +EXPORT_SYMBOL(ib_copy_qp_attr_to_user); + +void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct ib_sa_path_rec *src) +{ + memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); + memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); + + dst->dlid = src->dlid; + dst->slid = src->slid; + dst->raw_traffic = src->raw_traffic; + dst->flow_label = src->flow_label; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; + dst->reversible = src->reversible; + dst->numb_path = src->numb_path; + dst->pkey = src->pkey; + dst->sl = src->sl; + dst->mtu_selector = src->mtu_selector; + dst->mtu = src->mtu; + dst->rate_selector = src->rate_selector; + dst->rate = src->rate; + dst->packet_life_time = src->packet_life_time; + dst->preference = src->preference; + dst->packet_life_time_selector = src->packet_life_time_selector; +} +EXPORT_SYMBOL(ib_copy_path_rec_to_user); + +void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, + struct ib_user_path_rec *src) +{ + memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); + memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); + + dst->dlid = src->dlid; + dst->slid = src->slid; + dst->raw_traffic = src->raw_traffic; + dst->flow_label = src->flow_label; + dst->hop_limit = src->hop_limit; + dst->traffic_class = src->traffic_class; + dst->reversible = src->reversible; + dst->numb_path = src->numb_path; + dst->pkey = src->pkey; + dst->sl = src->sl; + dst->mtu_selector = src->mtu_selector; + dst->mtu = src->mtu; + dst->rate_selector = src->rate_selector; + dst->rate = src->rate; + dst->packet_life_time = src->packet_life_time; + dst->preference = src->preference; + dst->packet_life_time_selector = src->packet_life_time_selector; + + memset(dst->dmac, 0, sizeof(dst->dmac)); + dst->net = NULL; + dst->ifindex = 0; + dst->gid_type = IB_GID_TYPE_IB; +} +EXPORT_SYMBOL(ib_copy_path_rec_from_user); Property changes on: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_mad.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_mad.h (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_mad.h (nonexistent) @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_MAD_H -#define IB_USER_MAD_H - -#include -#include - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IB_USER_MAD_ABI_VERSION 5 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - */ - -/** - * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index - * @id - ID of agent MAD received with/to be sent with - * @status - 0 on successful receive, ETIMEDOUT if no response - * received (transaction ID in data[] will be set to TID of original - * request) (ignored on send) - * @timeout_ms - Milliseconds to wait for response (unset on receive) - * @retries - Number of automatic retries to attempt - * @qpn - Remote QP number received from/to be sent to - * @qkey - Remote Q_Key to be sent with (unset on receive) - * @lid - Remote lid received from/to be sent to - * @sl - Service level received with/to be sent with - * @path_bits - Local path bits received with/to be sent with - * @grh_present - If set, GRH was received/should be sent - * @gid_index - Local GID index to send with (unset on receive) - * @hop_limit - Hop limit in GRH - * @traffic_class - Traffic class in GRH - * @gid - Remote GID in GRH - * @flow_label - Flow label in GRH - */ -struct ib_user_mad_hdr_old { - __u32 id; - __u32 status; - __u32 timeout_ms; - __u32 retries; - __u32 length; - __be32 qpn; - __be32 qkey; - __be16 lid; - __u8 sl; - __u8 path_bits; - __u8 grh_present; - __u8 gid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 gid[16]; - __be32 flow_label; -}; - -/** - * ib_user_mad_hdr - MAD packet header - * This layout allows specifying/receiving the P_Key index. To use - * this capability, an application must call the - * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before - * any other actions with the file handle. - * @id - ID of agent MAD received with/to be sent with - * @status - 0 on successful receive, ETIMEDOUT if no response - * received (transaction ID in data[] will be set to TID of original - * request) (ignored on send) - * @timeout_ms - Milliseconds to wait for response (unset on receive) - * @retries - Number of automatic retries to attempt - * @qpn - Remote QP number received from/to be sent to - * @qkey - Remote Q_Key to be sent with (unset on receive) - * @lid - Remote lid received from/to be sent to - * @sl - Service level received with/to be sent with - * @path_bits - Local path bits received with/to be sent with - * @grh_present - If set, GRH was received/should be sent - * @gid_index - Local GID index to send with (unset on receive) - * @hop_limit - Hop limit in GRH - * @traffic_class - Traffic class in GRH - * @gid - Remote GID in GRH - * @flow_label - Flow label in GRH - * @pkey_index - P_Key index - */ -struct ib_user_mad_hdr { - __u32 id; - __u32 status; - __u32 timeout_ms; - __u32 retries; - __u32 length; - __be32 qpn; - __be32 qkey; - __be16 lid; - __u8 sl; - __u8 path_bits; - __u8 grh_present; - __u8 gid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 gid[16]; - __be32 flow_label; - __u16 pkey_index; - __u8 reserved[6]; -}; - -/** - * ib_user_mad - MAD packet - * @hdr - MAD packet header - * @data - Contents of MAD - * - */ -struct ib_user_mad { - struct ib_user_mad_hdr hdr; - __u64 data[0]; -}; - -/* - * Earlier versions of this interface definition declared the - * method_mask[] member as an array of __u32 but treated it as a - * bitmap made up of longs in the kernel. This ambiguity meant that - * 32-bit big-endian applications that can run on both 32-bit and - * 64-bit kernels had no consistent ABI to rely on, and 64-bit - * big-endian applications that treated method_mask as being made up - * of 32-bit words would have their bitmap misinterpreted. - * - * To clear up this confusion, we change the declaration of - * method_mask[] to use unsigned long and handle the conversion from - * 32-bit userspace to 64-bit kernel for big-endian systems in the - * compat_ioctl method. Unfortunately, to keep the structure layout - * the same, we need the method_mask[] array to be aligned only to 4 - * bytes even when long is 64 bits, which forces us into this ugly - * typedef. - */ -typedef unsigned long __attribute__((aligned(4))) packed_ulong; -#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long))) - -/** - * ib_user_mad_reg_req - MAD registration request - * @id - Set by the kernel; used to identify agent in future requests. - * @qpn - Queue pair number; must be 0 or 1. - * @method_mask - The caller will receive unsolicited MADs for any method - * where @method_mask = 1. - * @mgmt_class - Indicates which management class of MADs should be receive - * by the caller. This field is only required if the user wishes to - * receive unsolicited MADs, otherwise it should be 0. - * @mgmt_class_version - Indicates which version of MADs for the given - * management class to receive. - * @oui: Indicates IEEE OUI when mgmt_class is a vendor class - * in the range from 0x30 to 0x4f. Otherwise not used. - * @rmpp_version: If set, indicates the RMPP version used. - * - */ -struct ib_user_mad_reg_req { - __u32 id; - packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; - __u8 qpn; - __u8 mgmt_class; - __u8 mgmt_class_version; - __u8 oui[3]; - __u8 rmpp_version; -}; - -/** - * ib_user_mad_reg_req2 - MAD registration request - * - * @id - Set by the _kernel_; used by userspace to identify the - * registered agent in future requests. - * @qpn - Queue pair number; must be 0 or 1. - * @mgmt_class - Indicates which management class of MADs should be - * receive by the caller. This field is only required if - * the user wishes to receive unsolicited MADs, otherwise - * it should be 0. - * @mgmt_class_version - Indicates which version of MADs for the given - * management class to receive. - * @res - Ignored. - * @flags - additional registration flags; Must be in the set of - * flags defined in IB_USER_MAD_REG_FLAGS_CAP - * @method_mask - The caller wishes to receive unsolicited MADs for the - * methods whose bit(s) is(are) set. - * @oui - Indicates IEEE OUI to use when mgmt_class is a vendor - * class in the range from 0x30 to 0x4f. Otherwise not - * used. - * @rmpp_version - If set, indicates the RMPP version to use. - */ -enum { - IB_USER_MAD_USER_RMPP = (1 << 0), -}; -#define IB_USER_MAD_REG_FLAGS_CAP (IB_USER_MAD_USER_RMPP) -struct ib_user_mad_reg_req2 { - __u32 id; - __u32 qpn; - __u8 mgmt_class; - __u8 mgmt_class_version; - __u16 res; - __u32 flags; - __u64 method_mask[2]; - __u32 oui; - __u8 rmpp_version; - __u8 reserved[3]; -}; - -#define IB_IOCTL_MAGIC 0x1b - -#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ - struct ib_user_mad_reg_req) - -#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) - -#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) - -#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \ - struct ib_user_mad_reg_req2) - -#endif /* IB_USER_MAD_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_mad.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_sa.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_sa.h (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_sa.h (nonexistent) @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_SA_H -#define IB_USER_SA_H - -#include - -enum { - IB_PATH_GMP = 1, - IB_PATH_PRIMARY = (1<<1), - IB_PATH_ALTERNATE = (1<<2), - IB_PATH_OUTBOUND = (1<<3), - IB_PATH_INBOUND = (1<<4), - IB_PATH_INBOUND_REVERSE = (1<<5), - IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE -}; - -struct ib_path_rec_data { - __u32 flags; - __u32 reserved; - __u32 path_rec[16]; -}; - -struct ib_user_path_rec { - __u8 dgid[16]; - __u8 sgid[16]; - __be16 dlid; - __be16 slid; - __u32 raw_traffic; - __be32 flow_label; - __u32 reversible; - __u32 mtu; - __be16 pkey; - __u8 hop_limit; - __u8 traffic_class; - __u8 numb_path; - __u8 sl; - __u8 mtu_selector; - __u8 rate_selector; - __u8 rate; - __u8 packet_life_time_selector; - __u8 packet_life_time; - __u8 preference; -}; - -#endif /* IB_USER_SA_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_sa.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/rdma/rdma_user_cm.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/rdma/rdma_user_cm.h (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/include/rdma/rdma_user_cm.h (nonexistent) @@ -1,310 +0,0 @@ -/* - * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef RDMA_USER_CM_H -#define RDMA_USER_CM_H - -#include -#include -#include -#include -#include - -#define RDMA_USER_CM_ABI_VERSION 4 - -#define RDMA_MAX_PRIVATE_DATA 256 - -enum { - RDMA_USER_CM_CMD_CREATE_ID, - RDMA_USER_CM_CMD_DESTROY_ID, - RDMA_USER_CM_CMD_BIND_IP, - RDMA_USER_CM_CMD_RESOLVE_IP, - RDMA_USER_CM_CMD_RESOLVE_ROUTE, - RDMA_USER_CM_CMD_QUERY_ROUTE, - RDMA_USER_CM_CMD_CONNECT, - RDMA_USER_CM_CMD_LISTEN, - RDMA_USER_CM_CMD_ACCEPT, - RDMA_USER_CM_CMD_REJECT, - RDMA_USER_CM_CMD_DISCONNECT, - RDMA_USER_CM_CMD_INIT_QP_ATTR, - RDMA_USER_CM_CMD_GET_EVENT, - RDMA_USER_CM_CMD_GET_OPTION, - RDMA_USER_CM_CMD_SET_OPTION, - RDMA_USER_CM_CMD_NOTIFY, - RDMA_USER_CM_CMD_JOIN_IP_MCAST, - RDMA_USER_CM_CMD_LEAVE_MCAST, - RDMA_USER_CM_CMD_MIGRATE_ID, - RDMA_USER_CM_CMD_QUERY, - RDMA_USER_CM_CMD_BIND, - RDMA_USER_CM_CMD_RESOLVE_ADDR, - RDMA_USER_CM_CMD_JOIN_MCAST -}; - -/* - * command ABI structures. - */ -struct rdma_ucm_cmd_hdr { - __u32 cmd; - __u16 in; - __u16 out; -}; - -struct rdma_ucm_create_id { - __u64 uid; - __u64 response; - __u16 ps; - __u8 qp_type; - __u8 reserved[5]; -}; - -struct rdma_ucm_create_id_resp { - __u32 id; -}; - -struct rdma_ucm_destroy_id { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_destroy_id_resp { - __u32 events_reported; -}; - -struct rdma_ucm_bind_ip { - __u64 response; - struct sockaddr_in6 addr; - __u32 id; -}; - -struct rdma_ucm_bind { - __u32 id; - __u16 addr_size; - __u16 reserved; - struct sockaddr_storage addr; -}; - -struct rdma_ucm_resolve_ip { - struct sockaddr_in6 src_addr; - struct sockaddr_in6 dst_addr; - __u32 id; - __u32 timeout_ms; -}; - -struct rdma_ucm_resolve_addr { - __u32 id; - __u32 timeout_ms; - __u16 src_size; - __u16 dst_size; - __u32 reserved; - struct sockaddr_storage src_addr; - struct sockaddr_storage dst_addr; -}; - -struct rdma_ucm_resolve_route { - __u32 id; - __u32 timeout_ms; -}; - -enum { - RDMA_USER_CM_QUERY_ADDR, - RDMA_USER_CM_QUERY_PATH, - RDMA_USER_CM_QUERY_GID -}; - -struct rdma_ucm_query { - __u64 response; - __u32 id; - __u32 option; -}; - -struct rdma_ucm_query_route_resp { - __u64 node_guid; - struct ib_user_path_rec ib_route[2]; - struct sockaddr_in6 src_addr; - struct sockaddr_in6 dst_addr; - __u32 num_paths; - __u8 port_num; - __u8 reserved[3]; -}; - -struct rdma_ucm_query_addr_resp { - __u64 node_guid; - __u8 port_num; - __u8 reserved; - __u16 pkey; - __u16 src_size; - __u16 dst_size; - struct sockaddr_storage src_addr; - struct sockaddr_storage dst_addr; -}; - -struct rdma_ucm_query_path_resp { - __u32 num_paths; - __u32 reserved; - struct ib_path_rec_data path_data[0]; -}; - -struct rdma_ucm_conn_param { - __u32 qp_num; - __u32 qkey; - __u8 private_data[RDMA_MAX_PRIVATE_DATA]; - __u8 private_data_len; - __u8 srq; - __u8 responder_resources; - __u8 initiator_depth; - __u8 flow_control; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 valid; -}; - -struct rdma_ucm_ud_param { - __u32 qp_num; - __u32 qkey; - struct ib_uverbs_ah_attr ah_attr; - __u8 private_data[RDMA_MAX_PRIVATE_DATA]; - __u8 private_data_len; - __u8 reserved[7]; -}; - -struct rdma_ucm_connect { - struct rdma_ucm_conn_param conn_param; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_listen { - __u32 id; - __u32 backlog; -}; - -struct rdma_ucm_accept { - __u64 uid; - struct rdma_ucm_conn_param conn_param; - __u32 id; - __u32 reserved; -}; - -struct rdma_ucm_reject { - __u32 id; - __u8 private_data_len; - __u8 reserved[3]; - __u8 private_data[RDMA_MAX_PRIVATE_DATA]; -}; - -struct rdma_ucm_disconnect { - __u32 id; -}; - -struct rdma_ucm_init_qp_attr { - __u64 response; - __u32 id; - __u32 qp_state; -}; - -struct rdma_ucm_notify { - __u32 id; - __u32 event; -}; - -struct rdma_ucm_join_ip_mcast { - __u64 response; /* rdma_ucm_create_id_resp */ - __u64 uid; - struct sockaddr_in6 addr; - __u32 id; -}; - -/* Multicast join flags */ -enum { - RDMA_MC_JOIN_FLAG_FULLMEMBER, - RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, - RDMA_MC_JOIN_FLAG_RESERVED, -}; - -struct rdma_ucm_join_mcast { - __u64 response; /* rdma_ucma_create_id_resp */ - __u64 uid; - __u32 id; - __u16 addr_size; - __u16 join_flags; - struct sockaddr_storage addr; -}; - -struct rdma_ucm_get_event { - __u64 response; -}; - -struct rdma_ucm_event_resp { - __u64 uid; - __u32 id; - __u32 event; - __u32 status; - union { - struct rdma_ucm_conn_param conn; - struct rdma_ucm_ud_param ud; - } param; -}; - -/* Option levels */ -enum { - RDMA_OPTION_ID = 0, - RDMA_OPTION_IB = 1 -}; - -/* Option details */ -enum { - RDMA_OPTION_ID_TOS = 0, - RDMA_OPTION_ID_REUSEADDR = 1, - RDMA_OPTION_ID_AFONLY = 2, - RDMA_OPTION_IB_PATH = 1 -}; - -struct rdma_ucm_set_option { - __u64 optval; - __u32 id; - __u32 level; - __u32 optname; - __u32 optlen; -}; - -struct rdma_ucm_migrate_id { - __u64 response; - __u32 id; - __u32 fd; -}; - -struct rdma_ucm_migrate_resp { - __u32 events_reported; -}; - -#endif /* RDMA_USER_CM_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/rdma/rdma_user_cm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_verbs.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_verbs.h (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_verbs.h (nonexistent) @@ -1,1067 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2005 PathScale, Inc. All rights reserved. - * Copyright (c) 2006 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_VERBS_H -#define IB_USER_VERBS_H - -#include - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define IB_USER_VERBS_ABI_VERSION 6 -#define IB_USER_VERBS_CMD_THRESHOLD 50 - -enum { - IB_USER_VERBS_CMD_GET_CONTEXT, - IB_USER_VERBS_CMD_QUERY_DEVICE, - IB_USER_VERBS_CMD_QUERY_PORT, - IB_USER_VERBS_CMD_ALLOC_PD, - IB_USER_VERBS_CMD_DEALLOC_PD, - IB_USER_VERBS_CMD_CREATE_AH, - IB_USER_VERBS_CMD_MODIFY_AH, - IB_USER_VERBS_CMD_QUERY_AH, - IB_USER_VERBS_CMD_DESTROY_AH, - IB_USER_VERBS_CMD_REG_MR, - IB_USER_VERBS_CMD_REG_SMR, - IB_USER_VERBS_CMD_REREG_MR, - IB_USER_VERBS_CMD_QUERY_MR, - IB_USER_VERBS_CMD_DEREG_MR, - IB_USER_VERBS_CMD_ALLOC_MW, - IB_USER_VERBS_CMD_BIND_MW, - IB_USER_VERBS_CMD_DEALLOC_MW, - IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, - IB_USER_VERBS_CMD_CREATE_CQ, - IB_USER_VERBS_CMD_RESIZE_CQ, - IB_USER_VERBS_CMD_DESTROY_CQ, - IB_USER_VERBS_CMD_POLL_CQ, - IB_USER_VERBS_CMD_PEEK_CQ, - IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, - IB_USER_VERBS_CMD_CREATE_QP, - IB_USER_VERBS_CMD_QUERY_QP, - IB_USER_VERBS_CMD_MODIFY_QP, - IB_USER_VERBS_CMD_DESTROY_QP, - IB_USER_VERBS_CMD_POST_SEND, - IB_USER_VERBS_CMD_POST_RECV, - IB_USER_VERBS_CMD_ATTACH_MCAST, - IB_USER_VERBS_CMD_DETACH_MCAST, - IB_USER_VERBS_CMD_CREATE_SRQ, - IB_USER_VERBS_CMD_MODIFY_SRQ, - IB_USER_VERBS_CMD_QUERY_SRQ, - IB_USER_VERBS_CMD_DESTROY_SRQ, - IB_USER_VERBS_CMD_POST_SRQ_RECV, - IB_USER_VERBS_CMD_OPEN_XRCD, - IB_USER_VERBS_CMD_CLOSE_XRCD, - IB_USER_VERBS_CMD_CREATE_XSRQ, - IB_USER_VERBS_CMD_OPEN_QP, -}; - -enum { - IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, - IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, - IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, - IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, - IB_USER_VERBS_EX_CMD_DESTROY_FLOW, - IB_USER_VERBS_EX_CMD_CREATE_WQ, - IB_USER_VERBS_EX_CMD_MODIFY_WQ, - IB_USER_VERBS_EX_CMD_DESTROY_WQ, - IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, - IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL -}; - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * Specifically: - * - Do not use pointer types -- pass pointers in __u64 instead. - * - Make sure that any structure larger than 4 bytes is padded to a - * multiple of 8 bytes. Otherwise the structure size will be - * different between 32-bit and 64-bit architectures. - */ - -struct ib_uverbs_async_event_desc { - __u64 element; - __u32 event_type; /* enum ib_event_type */ - __u32 reserved; -}; - -struct ib_uverbs_comp_event_desc { - __u64 cq_handle; -}; - -/* - * All commands from userspace should start with a __u32 command field - * followed by __u16 in_words and out_words fields (which give the - * length of the command block and response buffer if any in 32-bit - * words). The kernel driver will read these fields first and read - * the rest of the command struct based on these value. - */ - -#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff -#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u -#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 - -#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 - -struct ib_uverbs_cmd_hdr { - __u32 command; - __u16 in_words; - __u16 out_words; -}; - -struct ib_uverbs_ex_cmd_hdr { - __u64 response; - __u16 provider_in_words; - __u16 provider_out_words; - __u32 cmd_hdr_reserved; -}; - -struct ib_uverbs_get_context { - __u64 response; - __u64 driver_data[0]; -}; - -struct ib_uverbs_get_context_resp { - __u32 async_fd; - __u32 num_comp_vectors; -}; - -struct ib_uverbs_query_device { - __u64 response; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_device_resp { - __u64 fw_ver; - __be64 node_guid; - __be64 sys_image_guid; - __u64 max_mr_size; - __u64 page_size_cap; - __u32 vendor_id; - __u32 vendor_part_id; - __u32 hw_ver; - __u32 max_qp; - __u32 max_qp_wr; - __u32 device_cap_flags; - __u32 max_sge; - __u32 max_sge_rd; - __u32 max_cq; - __u32 max_cqe; - __u32 max_mr; - __u32 max_pd; - __u32 max_qp_rd_atom; - __u32 max_ee_rd_atom; - __u32 max_res_rd_atom; - __u32 max_qp_init_rd_atom; - __u32 max_ee_init_rd_atom; - __u32 atomic_cap; - __u32 max_ee; - __u32 max_rdd; - __u32 max_mw; - __u32 max_raw_ipv6_qp; - __u32 max_raw_ethy_qp; - __u32 max_mcast_grp; - __u32 max_mcast_qp_attach; - __u32 max_total_mcast_qp_attach; - __u32 max_ah; - __u32 max_fmr; - __u32 max_map_per_fmr; - __u32 max_srq; - __u32 max_srq_wr; - __u32 max_srq_sge; - __u16 max_pkeys; - __u8 local_ca_ack_delay; - __u8 phys_port_cnt; - __u8 reserved[4]; -}; - -struct ib_uverbs_ex_query_device { - __u32 comp_mask; - __u32 reserved; -}; - -struct ib_uverbs_odp_caps { - __u64 general_caps; - struct { - __u32 rc_odp_caps; - __u32 uc_odp_caps; - __u32 ud_odp_caps; - } per_transport_caps; - __u32 reserved; -}; - -struct ib_uverbs_rss_caps { - /* Corresponding bit will be set if qp type from - * 'enum ib_qp_type' is supported, e.g. - * supported_qpts |= 1 << IB_QPT_UD - */ - __u32 supported_qpts; - __u32 max_rwq_indirection_tables; - __u32 max_rwq_indirection_table_size; - __u32 reserved; -}; - -struct ib_uverbs_ex_query_device_resp { - struct ib_uverbs_query_device_resp base; - __u32 comp_mask; - __u32 response_length; - struct ib_uverbs_odp_caps odp_caps; - __u64 timestamp_mask; - __u64 hca_core_clock; /* in KHZ */ - __u64 device_cap_flags_ex; - struct ib_uverbs_rss_caps rss_caps; - __u32 max_wq_type_rq; - __u32 reserved; -}; - -struct ib_uverbs_query_port { - __u64 response; - __u8 port_num; - __u8 reserved[7]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_port_resp { - __u32 port_cap_flags; - __u32 max_msg_sz; - __u32 bad_pkey_cntr; - __u32 qkey_viol_cntr; - __u32 gid_tbl_len; - __u16 pkey_tbl_len; - __u16 lid; - __u16 sm_lid; - __u8 state; - __u8 max_mtu; - __u8 active_mtu; - __u8 lmc; - __u8 max_vl_num; - __u8 sm_sl; - __u8 subnet_timeout; - __u8 init_type_reply; - __u8 active_width; - __u8 active_speed; - __u8 phys_state; - __u8 link_layer; - __u8 reserved[2]; -}; - -struct ib_uverbs_alloc_pd { - __u64 response; - __u64 driver_data[0]; -}; - -struct ib_uverbs_alloc_pd_resp { - __u32 pd_handle; -}; - -struct ib_uverbs_dealloc_pd { - __u32 pd_handle; -}; - -struct ib_uverbs_open_xrcd { - __u64 response; - __u32 fd; - __u32 oflags; - __u64 driver_data[0]; -}; - -struct ib_uverbs_open_xrcd_resp { - __u32 xrcd_handle; -}; - -struct ib_uverbs_close_xrcd { - __u32 xrcd_handle; -}; - -struct ib_uverbs_reg_mr { - __u64 response; - __u64 start; - __u64 length; - __u64 hca_va; - __u32 pd_handle; - __u32 access_flags; - __u64 driver_data[0]; -}; - -struct ib_uverbs_reg_mr_resp { - __u32 mr_handle; - __u32 lkey; - __u32 rkey; -}; - -struct ib_uverbs_rereg_mr { - __u64 response; - __u32 mr_handle; - __u32 flags; - __u64 start; - __u64 length; - __u64 hca_va; - __u32 pd_handle; - __u32 access_flags; -}; - -struct ib_uverbs_rereg_mr_resp { - __u32 lkey; - __u32 rkey; -}; - -struct ib_uverbs_dereg_mr { - __u32 mr_handle; -}; - -struct ib_uverbs_alloc_mw { - __u64 response; - __u32 pd_handle; - __u8 mw_type; - __u8 reserved[3]; -}; - -struct ib_uverbs_alloc_mw_resp { - __u32 mw_handle; - __u32 rkey; -}; - -struct ib_uverbs_dealloc_mw { - __u32 mw_handle; -}; - -struct ib_uverbs_create_comp_channel { - __u64 response; -}; - -struct ib_uverbs_create_comp_channel_resp { - __u32 fd; -}; - -struct ib_uverbs_create_cq { - __u64 response; - __u64 user_handle; - __u32 cqe; - __u32 comp_vector; - __s32 comp_channel; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_ex_create_cq { - __u64 user_handle; - __u32 cqe; - __u32 comp_vector; - __s32 comp_channel; - __u32 comp_mask; - __u32 flags; - __u32 reserved; -}; - -struct ib_uverbs_create_cq_resp { - __u32 cq_handle; - __u32 cqe; -}; - -struct ib_uverbs_ex_create_cq_resp { - struct ib_uverbs_create_cq_resp base; - __u32 comp_mask; - __u32 response_length; -}; - -struct ib_uverbs_resize_cq { - __u64 response; - __u32 cq_handle; - __u32 cqe; - __u64 driver_data[0]; -}; - -struct ib_uverbs_resize_cq_resp { - __u32 cqe; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_poll_cq { - __u64 response; - __u32 cq_handle; - __u32 ne; -}; - -struct ib_uverbs_wc { - __u64 wr_id; - __u32 status; - __u32 opcode; - __u32 vendor_err; - __u32 byte_len; - union { - __u32 imm_data; - __u32 invalidate_rkey; - } ex; - __u32 qp_num; - __u32 src_qp; - __u32 wc_flags; - __u16 pkey_index; - __u16 slid; - __u8 sl; - __u8 dlid_path_bits; - __u8 port_num; - __u8 reserved; -}; - -struct ib_uverbs_poll_cq_resp { - __u32 count; - __u32 reserved; - struct ib_uverbs_wc wc[0]; -}; - -struct ib_uverbs_req_notify_cq { - __u32 cq_handle; - __u32 solicited_only; -}; - -struct ib_uverbs_destroy_cq { - __u64 response; - __u32 cq_handle; - __u32 reserved; -}; - -struct ib_uverbs_destroy_cq_resp { - __u32 comp_events_reported; - __u32 async_events_reported; -}; - -struct ib_uverbs_global_route { - __u8 dgid[16]; - __u32 flow_label; - __u8 sgid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 reserved; -}; - -struct ib_uverbs_ah_attr { - struct ib_uverbs_global_route grh; - __u16 dlid; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; - __u8 reserved; -}; - -struct ib_uverbs_qp_attr { - __u32 qp_attr_mask; - __u32 qp_state; - __u32 cur_qp_state; - __u32 path_mtu; - __u32 path_mig_state; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - - struct ib_uverbs_ah_attr ah_attr; - struct ib_uverbs_ah_attr alt_ah_attr; - - /* ib_qp_cap */ - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 en_sqd_async_notify; - __u8 sq_draining; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 reserved[5]; -}; - -struct ib_uverbs_create_qp { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 send_cq_handle; - __u32 recv_cq_handle; - __u32 srq_handle; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u8 sq_sig_all; - __u8 qp_type; - __u8 is_srq; - __u8 reserved; - __u64 driver_data[0]; -}; - -enum ib_uverbs_create_qp_mask { - IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0, -}; - -enum { - IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, -}; - -struct ib_uverbs_ex_create_qp { - __u64 user_handle; - __u32 pd_handle; - __u32 send_cq_handle; - __u32 recv_cq_handle; - __u32 srq_handle; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u8 sq_sig_all; - __u8 qp_type; - __u8 is_srq; - __u8 reserved; - __u32 comp_mask; - __u32 create_flags; - __u32 rwq_ind_tbl_handle; - __u32 reserved1; -}; - -struct ib_uverbs_open_qp { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 qpn; - __u8 qp_type; - __u8 reserved[7]; - __u64 driver_data[0]; -}; - -/* also used for open response */ -struct ib_uverbs_create_qp_resp { - __u32 qp_handle; - __u32 qpn; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u32 reserved; -}; - -struct ib_uverbs_ex_create_qp_resp { - struct ib_uverbs_create_qp_resp base; - __u32 comp_mask; - __u32 response_length; -}; - -/* - * This struct needs to remain a multiple of 8 bytes to keep the - * alignment of the modify QP parameters. - */ -struct ib_uverbs_qp_dest { - __u8 dgid[16]; - __u32 flow_label; - __u16 dlid; - __u16 reserved; - __u8 sgid_index; - __u8 hop_limit; - __u8 traffic_class; - __u8 sl; - __u8 src_path_bits; - __u8 static_rate; - __u8 is_global; - __u8 port_num; -}; - -struct ib_uverbs_query_qp { - __u64 response; - __u32 qp_handle; - __u32 attr_mask; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_qp_resp { - struct ib_uverbs_qp_dest dest; - struct ib_uverbs_qp_dest alt_dest; - __u32 max_send_wr; - __u32 max_recv_wr; - __u32 max_send_sge; - __u32 max_recv_sge; - __u32 max_inline_data; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 qp_state; - __u8 cur_qp_state; - __u8 path_mtu; - __u8 path_mig_state; - __u8 sq_draining; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 sq_sig_all; - __u8 reserved[5]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_modify_qp { - struct ib_uverbs_qp_dest dest; - struct ib_uverbs_qp_dest alt_dest; - __u32 qp_handle; - __u32 attr_mask; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 qp_state; - __u8 cur_qp_state; - __u8 path_mtu; - __u8 path_mig_state; - __u8 en_sqd_async_notify; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 reserved[2]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_modify_qp_resp { -}; - -struct ib_uverbs_destroy_qp { - __u64 response; - __u32 qp_handle; - __u32 reserved; -}; - -struct ib_uverbs_destroy_qp_resp { - __u32 events_reported; -}; - -/* - * The ib_uverbs_sge structure isn't used anywhere, since we assume - * the ib_sge structure is packed the same way on 32-bit and 64-bit - * architectures in both kernel and user space. It's just here to - * document the ABI. - */ -struct ib_uverbs_sge { - __u64 addr; - __u32 length; - __u32 lkey; -}; - -struct ib_uverbs_send_wr { - __u64 wr_id; - __u32 num_sge; - __u32 opcode; - __u32 send_flags; - union { - __u32 imm_data; - __u32 invalidate_rkey; - } ex; - union { - struct { - __u64 remote_addr; - __u32 rkey; - __u32 reserved; - } rdma; - struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; - __u32 rkey; - __u32 reserved; - } atomic; - struct { - __u32 ah; - __u32 remote_qpn; - __u32 remote_qkey; - __u32 reserved; - } ud; - } wr; -}; - -struct ib_uverbs_post_send { - __u64 response; - __u32 qp_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_send_wr send_wr[0]; -}; - -struct ib_uverbs_post_send_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_recv_wr { - __u64 wr_id; - __u32 num_sge; - __u32 reserved; -}; - -struct ib_uverbs_post_recv { - __u64 response; - __u32 qp_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_recv_wr recv_wr[0]; -}; - -struct ib_uverbs_post_recv_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_post_srq_recv { - __u64 response; - __u32 srq_handle; - __u32 wr_count; - __u32 sge_count; - __u32 wqe_size; - struct ib_uverbs_recv_wr recv[0]; -}; - -struct ib_uverbs_post_srq_recv_resp { - __u32 bad_wr; -}; - -struct ib_uverbs_create_ah { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 reserved; - struct ib_uverbs_ah_attr attr; -}; - -struct ib_uverbs_create_ah_resp { - __u32 ah_handle; -}; - -struct ib_uverbs_destroy_ah { - __u32 ah_handle; -}; - -struct ib_uverbs_attach_mcast { - __u8 gid[16]; - __u32 qp_handle; - __u16 mlid; - __u16 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_detach_mcast { - __u8 gid[16]; - __u32 qp_handle; - __u16 mlid; - __u16 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_flow_spec_hdr { - __u32 type; - __u16 size; - __u16 reserved; - /* followed by flow_spec */ - __u64 flow_spec_data[0]; -}; - -struct ib_uverbs_flow_eth_filter { - __u8 dst_mac[6]; - __u8 src_mac[6]; - __be16 ether_type; - __be16 vlan_tag; -}; - -struct ib_uverbs_flow_spec_eth { - union { - struct ib_uverbs_flow_spec_hdr hdr; - struct { - __u32 type; - __u16 size; - __u16 reserved; - }; - }; - struct ib_uverbs_flow_eth_filter val; - struct ib_uverbs_flow_eth_filter mask; -}; - -struct ib_uverbs_flow_ipv4_filter { - __be32 src_ip; - __be32 dst_ip; - __u8 proto; - __u8 tos; - __u8 ttl; - __u8 flags; -}; - -struct ib_uverbs_flow_spec_ipv4 { - union { - struct ib_uverbs_flow_spec_hdr hdr; - struct { - __u32 type; - __u16 size; - __u16 reserved; - }; - }; - struct ib_uverbs_flow_ipv4_filter val; - struct ib_uverbs_flow_ipv4_filter mask; -}; - -struct ib_uverbs_flow_tcp_udp_filter { - __be16 dst_port; - __be16 src_port; -}; - -struct ib_uverbs_flow_spec_tcp_udp { - union { - struct ib_uverbs_flow_spec_hdr hdr; - struct { - __u32 type; - __u16 size; - __u16 reserved; - }; - }; - struct ib_uverbs_flow_tcp_udp_filter val; - struct ib_uverbs_flow_tcp_udp_filter mask; -}; - -struct ib_uverbs_flow_ipv6_filter { - __u8 src_ip[16]; - __u8 dst_ip[16]; - __be32 flow_label; - __u8 next_hdr; - __u8 traffic_class; - __u8 hop_limit; - __u8 reserved; -}; - -struct ib_uverbs_flow_spec_ipv6 { - union { - struct ib_uverbs_flow_spec_hdr hdr; - struct { - __u32 type; - __u16 size; - __u16 reserved; - }; - }; - struct ib_uverbs_flow_ipv6_filter val; - struct ib_uverbs_flow_ipv6_filter mask; -}; - -struct ib_uverbs_flow_attr { - __u32 type; - __u16 size; - __u16 priority; - __u8 num_of_specs; - __u8 reserved[2]; - __u8 port; - __u32 flags; - /* Following are the optional layers according to user request - * struct ib_flow_spec_xxx - * struct ib_flow_spec_yyy - */ - struct ib_uverbs_flow_spec_hdr flow_specs[0]; -}; - -struct ib_uverbs_create_flow { - __u32 comp_mask; - __u32 qp_handle; - struct ib_uverbs_flow_attr flow_attr; -}; - -struct ib_uverbs_create_flow_resp { - __u32 comp_mask; - __u32 flow_handle; -}; - -struct ib_uverbs_destroy_flow { - __u32 comp_mask; - __u32 flow_handle; -}; - -struct ib_uverbs_create_srq { - __u64 response; - __u64 user_handle; - __u32 pd_handle; - __u32 max_wr; - __u32 max_sge; - __u32 srq_limit; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_xsrq { - __u64 response; - __u64 user_handle; - __u32 srq_type; - __u32 pd_handle; - __u32 max_wr; - __u32 max_sge; - __u32 srq_limit; - __u32 reserved; - __u32 xrcd_handle; - __u32 cq_handle; - __u64 driver_data[0]; -}; - -struct ib_uverbs_create_srq_resp { - __u32 srq_handle; - __u32 max_wr; - __u32 max_sge; - __u32 srqn; -}; - -struct ib_uverbs_modify_srq { - __u32 srq_handle; - __u32 attr_mask; - __u32 max_wr; - __u32 srq_limit; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_srq { - __u64 response; - __u32 srq_handle; - __u32 reserved; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_srq_resp { - __u32 max_wr; - __u32 max_sge; - __u32 srq_limit; - __u32 reserved; -}; - -struct ib_uverbs_destroy_srq { - __u64 response; - __u32 srq_handle; - __u32 reserved; -}; - -struct ib_uverbs_destroy_srq_resp { - __u32 events_reported; -}; - -struct ib_uverbs_ex_create_wq { - __u32 comp_mask; - __u32 wq_type; - __u64 user_handle; - __u32 pd_handle; - __u32 cq_handle; - __u32 max_wr; - __u32 max_sge; -}; - -struct ib_uverbs_ex_create_wq_resp { - __u32 comp_mask; - __u32 response_length; - __u32 wq_handle; - __u32 max_wr; - __u32 max_sge; - __u32 wqn; -}; - -struct ib_uverbs_ex_destroy_wq { - __u32 comp_mask; - __u32 wq_handle; -}; - -struct ib_uverbs_ex_destroy_wq_resp { - __u32 comp_mask; - __u32 response_length; - __u32 events_reported; - __u32 reserved; -}; - -struct ib_uverbs_ex_modify_wq { - __u32 attr_mask; - __u32 wq_handle; - __u32 wq_state; - __u32 curr_wq_state; -}; - -/* Prevent memory allocation rather than max expected size */ -#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d -struct ib_uverbs_ex_create_rwq_ind_table { - __u32 comp_mask; - __u32 log_ind_tbl_size; - /* Following are the wq handles according to log_ind_tbl_size - * wq_handle1 - * wq_handle2 - */ - __u32 wq_handles[0]; -}; - -struct ib_uverbs_ex_create_rwq_ind_table_resp { - __u32 comp_mask; - __u32 response_length; - __u32 ind_tbl_handle; - __u32 ind_tbl_num; -}; - -struct ib_uverbs_ex_destroy_rwq_ind_table { - __u32 comp_mask; - __u32 ind_tbl_handle; -}; - -#endif /* IB_USER_VERBS_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_verbs.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_cm.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_cm.h (revision 320591) +++ projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_cm.h (nonexistent) @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IB_USER_CM_H -#define IB_USER_CM_H - -#include -#include - -#define IB_USER_CM_ABI_VERSION 5 - -enum { - IB_USER_CM_CMD_CREATE_ID, - IB_USER_CM_CMD_DESTROY_ID, - IB_USER_CM_CMD_ATTR_ID, - - IB_USER_CM_CMD_LISTEN, - IB_USER_CM_CMD_NOTIFY, - - IB_USER_CM_CMD_SEND_REQ, - IB_USER_CM_CMD_SEND_REP, - IB_USER_CM_CMD_SEND_RTU, - IB_USER_CM_CMD_SEND_DREQ, - IB_USER_CM_CMD_SEND_DREP, - IB_USER_CM_CMD_SEND_REJ, - IB_USER_CM_CMD_SEND_MRA, - IB_USER_CM_CMD_SEND_LAP, - IB_USER_CM_CMD_SEND_APR, - IB_USER_CM_CMD_SEND_SIDR_REQ, - IB_USER_CM_CMD_SEND_SIDR_REP, - - IB_USER_CM_CMD_EVENT, - IB_USER_CM_CMD_INIT_QP_ATTR, -}; -/* - * command ABI structures. - */ -struct ib_ucm_cmd_hdr { - __u32 cmd; - __u16 in; - __u16 out; -}; - -struct ib_ucm_create_id { - __u64 uid; - __u64 response; -}; - -struct ib_ucm_create_id_resp { - __u32 id; -}; - -struct ib_ucm_destroy_id { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_destroy_id_resp { - __u32 events_reported; -}; - -struct ib_ucm_attr_id { - __u64 response; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_attr_id_resp { - __be64 service_id; - __be64 service_mask; - __be32 local_id; - __be32 remote_id; -}; - -struct ib_ucm_init_qp_attr { - __u64 response; - __u32 id; - __u32 qp_state; -}; - -struct ib_ucm_listen { - __be64 service_id; - __be64 service_mask; - __u32 id; - __u32 reserved; -}; - -struct ib_ucm_notify { - __u32 id; - __u32 event; -}; - -struct ib_ucm_private_data { - __u64 data; - __u32 id; - __u8 len; - __u8 reserved[3]; -}; - -struct ib_ucm_req { - __u32 id; - __u32 qpn; - __u32 qp_type; - __u32 psn; - __be64 sid; - __u64 data; - __u64 primary_path; - __u64 alternate_path; - __u8 len; - __u8 peer_to_peer; - __u8 responder_resources; - __u8 initiator_depth; - __u8 remote_cm_response_timeout; - __u8 flow_control; - __u8 local_cm_response_timeout; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 max_cm_retries; - __u8 srq; - __u8 reserved[5]; -}; - -struct ib_ucm_rep { - __u64 uid; - __u64 data; - __u32 id; - __u32 qpn; - __u32 psn; - __u8 len; - __u8 responder_resources; - __u8 initiator_depth; - __u8 target_ack_delay; - __u8 failover_accepted; - __u8 flow_control; - __u8 rnr_retry_count; - __u8 srq; - __u8 reserved[4]; -}; - -struct ib_ucm_info { - __u32 id; - __u32 status; - __u64 info; - __u64 data; - __u8 info_len; - __u8 data_len; - __u8 reserved[6]; -}; - -struct ib_ucm_mra { - __u64 data; - __u32 id; - __u8 len; - __u8 timeout; - __u8 reserved[2]; -}; - -struct ib_ucm_lap { - __u64 path; - __u64 data; - __u32 id; - __u8 len; - __u8 reserved[3]; -}; - -struct ib_ucm_sidr_req { - __u32 id; - __u32 timeout; - __be64 sid; - __u64 data; - __u64 path; - __u16 reserved_pkey; - __u8 len; - __u8 max_cm_retries; - __u8 reserved[4]; -}; - -struct ib_ucm_sidr_rep { - __u32 id; - __u32 qpn; - __u32 qkey; - __u32 status; - __u64 info; - __u64 data; - __u8 info_len; - __u8 data_len; - __u8 reserved[6]; -}; -/* - * event notification ABI structures. - */ -struct ib_ucm_event_get { - __u64 response; - __u64 data; - __u64 info; - __u8 data_len; - __u8 info_len; - __u8 reserved[6]; -}; - -struct ib_ucm_req_event_resp { - struct ib_user_path_rec primary_path; - struct ib_user_path_rec alternate_path; - __be64 remote_ca_guid; - __u32 remote_qkey; - __u32 remote_qpn; - __u32 qp_type; - __u32 starting_psn; - __u8 responder_resources; - __u8 initiator_depth; - __u8 local_cm_response_timeout; - __u8 flow_control; - __u8 remote_cm_response_timeout; - __u8 retry_count; - __u8 rnr_retry_count; - __u8 srq; - __u8 port; - __u8 reserved[7]; -}; - -struct ib_ucm_rep_event_resp { - __be64 remote_ca_guid; - __u32 remote_qkey; - __u32 remote_qpn; - __u32 starting_psn; - __u8 responder_resources; - __u8 initiator_depth; - __u8 target_ack_delay; - __u8 failover_accepted; - __u8 flow_control; - __u8 rnr_retry_count; - __u8 srq; - __u8 reserved[5]; -}; - -struct ib_ucm_rej_event_resp { - __u32 reason; - /* ari in ib_ucm_event_get info field. */ -}; - -struct ib_ucm_mra_event_resp { - __u8 timeout; - __u8 reserved[3]; -}; - -struct ib_ucm_lap_event_resp { - struct ib_user_path_rec path; -}; - -struct ib_ucm_apr_event_resp { - __u32 status; - /* apr info in ib_ucm_event_get info field. */ -}; - -struct ib_ucm_sidr_req_event_resp { - __u16 pkey; - __u8 port; - __u8 reserved; -}; - -struct ib_ucm_sidr_rep_event_resp { - __u32 status; - __u32 qkey; - __u32 qpn; - /* info in ib_ucm_event_get info field. */ -}; - -#define IB_UCM_PRES_DATA 0x01 -#define IB_UCM_PRES_INFO 0x02 -#define IB_UCM_PRES_PRIMARY 0x04 -#define IB_UCM_PRES_ALTERNATE 0x08 - -struct ib_ucm_event_resp { - __u64 uid; - __u32 id; - __u32 event; - __u32 present; - __u32 reserved; - union { - struct ib_ucm_req_event_resp req_resp; - struct ib_ucm_rep_event_resp rep_resp; - struct ib_ucm_rej_event_resp rej_resp; - struct ib_ucm_mra_event_resp mra_resp; - struct ib_ucm_lap_event_resp lap_resp; - struct ib_ucm_apr_event_resp apr_resp; - - struct ib_ucm_sidr_req_event_resp sidr_req_resp; - struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; - - __u32 send_status; - } u; -}; - -#endif /* IB_USER_CM_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/rdma/ib_user_cm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_cm.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_cm.h (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_cm.h (revision 320592) @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_CM_H +#define IB_USER_CM_H + +#ifdef _KERNEL +#include +#else +#include +#endif +#include + +#define IB_USER_CM_ABI_VERSION 5 + +enum { + IB_USER_CM_CMD_CREATE_ID, + IB_USER_CM_CMD_DESTROY_ID, + IB_USER_CM_CMD_ATTR_ID, + + IB_USER_CM_CMD_LISTEN, + IB_USER_CM_CMD_NOTIFY, + + IB_USER_CM_CMD_SEND_REQ, + IB_USER_CM_CMD_SEND_REP, + IB_USER_CM_CMD_SEND_RTU, + IB_USER_CM_CMD_SEND_DREQ, + IB_USER_CM_CMD_SEND_DREP, + IB_USER_CM_CMD_SEND_REJ, + IB_USER_CM_CMD_SEND_MRA, + IB_USER_CM_CMD_SEND_LAP, + IB_USER_CM_CMD_SEND_APR, + IB_USER_CM_CMD_SEND_SIDR_REQ, + IB_USER_CM_CMD_SEND_SIDR_REP, + + IB_USER_CM_CMD_EVENT, + IB_USER_CM_CMD_INIT_QP_ATTR, +}; +/* + * command ABI structures. + */ +struct ib_ucm_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +struct ib_ucm_create_id { + __u64 uid; + __u64 response; +}; + +struct ib_ucm_create_id_resp { + __u32 id; +}; + +struct ib_ucm_destroy_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct ib_ucm_destroy_id_resp { + __u32 events_reported; +}; + +struct ib_ucm_attr_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct ib_ucm_attr_id_resp { + __be64 service_id; + __be64 service_mask; + __be32 local_id; + __be32 remote_id; +}; + +struct ib_ucm_init_qp_attr { + __u64 response; + __u32 id; + __u32 qp_state; +}; + +struct ib_ucm_listen { + __be64 service_id; + __be64 service_mask; + __u32 id; + __u32 reserved; +}; + +struct ib_ucm_notify { + __u32 id; + __u32 event; +}; + +struct ib_ucm_private_data { + __u64 data; + __u32 id; + __u8 len; + __u8 reserved[3]; +}; + +struct ib_ucm_req { + __u32 id; + __u32 qpn; + __u32 qp_type; + __u32 psn; + __be64 sid; + __u64 data; + __u64 primary_path; + __u64 alternate_path; + __u8 len; + __u8 peer_to_peer; + __u8 responder_resources; + __u8 initiator_depth; + __u8 remote_cm_response_timeout; + __u8 flow_control; + __u8 local_cm_response_timeout; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 max_cm_retries; + __u8 srq; + __u8 reserved[5]; +}; + +struct ib_ucm_rep { + __u64 uid; + __u64 data; + __u32 id; + __u32 qpn; + __u32 psn; + __u8 len; + __u8 responder_resources; + __u8 initiator_depth; + __u8 target_ack_delay; + __u8 failover_accepted; + __u8 flow_control; + __u8 rnr_retry_count; + __u8 srq; + __u8 reserved[4]; +}; + +struct ib_ucm_info { + __u32 id; + __u32 status; + __u64 info; + __u64 data; + __u8 info_len; + __u8 data_len; + __u8 reserved[6]; +}; + +struct ib_ucm_mra { + __u64 data; + __u32 id; + __u8 len; + __u8 timeout; + __u8 reserved[2]; +}; + +struct ib_ucm_lap { + __u64 path; + __u64 data; + __u32 id; + __u8 len; + __u8 reserved[3]; +}; + +struct ib_ucm_sidr_req { + __u32 id; + __u32 timeout; + __be64 sid; + __u64 data; + __u64 path; + __u16 reserved_pkey; + __u8 len; + __u8 max_cm_retries; + __u8 reserved[4]; +}; + +struct ib_ucm_sidr_rep { + __u32 id; + __u32 qpn; + __u32 qkey; + __u32 status; + __u64 info; + __u64 data; + __u8 info_len; + __u8 data_len; + __u8 reserved[6]; +}; +/* + * event notification ABI structures. + */ +struct ib_ucm_event_get { + __u64 response; + __u64 data; + __u64 info; + __u8 data_len; + __u8 info_len; + __u8 reserved[6]; +}; + +struct ib_ucm_req_event_resp { + struct ib_user_path_rec primary_path; + struct ib_user_path_rec alternate_path; + __be64 remote_ca_guid; + __u32 remote_qkey; + __u32 remote_qpn; + __u32 qp_type; + __u32 starting_psn; + __u8 responder_resources; + __u8 initiator_depth; + __u8 local_cm_response_timeout; + __u8 flow_control; + __u8 remote_cm_response_timeout; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 srq; + __u8 port; + __u8 reserved[7]; +}; + +struct ib_ucm_rep_event_resp { + __be64 remote_ca_guid; + __u32 remote_qkey; + __u32 remote_qpn; + __u32 starting_psn; + __u8 responder_resources; + __u8 initiator_depth; + __u8 target_ack_delay; + __u8 failover_accepted; + __u8 flow_control; + __u8 rnr_retry_count; + __u8 srq; + __u8 reserved[5]; +}; + +struct ib_ucm_rej_event_resp { + __u32 reason; + /* ari in ib_ucm_event_get info field. */ +}; + +struct ib_ucm_mra_event_resp { + __u8 timeout; + __u8 reserved[3]; +}; + +struct ib_ucm_lap_event_resp { + struct ib_user_path_rec path; +}; + +struct ib_ucm_apr_event_resp { + __u32 status; + /* apr info in ib_ucm_event_get info field. */ +}; + +struct ib_ucm_sidr_req_event_resp { + __u16 pkey; + __u8 port; + __u8 reserved; +}; + +struct ib_ucm_sidr_rep_event_resp { + __u32 status; + __u32 qkey; + __u32 qpn; + /* info in ib_ucm_event_get info field. */ +}; + +#define IB_UCM_PRES_DATA 0x01 +#define IB_UCM_PRES_INFO 0x02 +#define IB_UCM_PRES_PRIMARY 0x04 +#define IB_UCM_PRES_ALTERNATE 0x08 + +struct ib_ucm_event_resp { + __u64 uid; + __u32 id; + __u32 event; + __u32 present; + __u32 reserved; + union { + struct ib_ucm_req_event_resp req_resp; + struct ib_ucm_rep_event_resp rep_resp; + struct ib_ucm_rej_event_resp rej_resp; + struct ib_ucm_mra_event_resp mra_resp; + struct ib_ucm_lap_event_resp lap_resp; + struct ib_ucm_apr_event_resp apr_resp; + + struct ib_ucm_sidr_req_event_resp sidr_req_resp; + struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; + + __u32 send_status; + } u; +}; + +#endif /* IB_USER_CM_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_cm.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_mad.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_mad.h (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_mad.h (revision 320592) @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_MAD_H +#define IB_USER_MAD_H + +#ifdef _KERNEL +#include +#include +#else +#include +#include +#endif + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_MAD_ABI_VERSION 5 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + */ + +/** + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + */ +struct ib_user_mad_hdr_old { + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 retries; + __u32 length; + __be32 qpn; + __be32 qkey; + __be16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __be32 flow_label; +}; + +/** + * ib_user_mad_hdr - MAD packet header + * This layout allows specifying/receiving the P_Key index. To use + * this capability, an application must call the + * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before + * any other actions with the file handle. + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + * @pkey_index - P_Key index + */ +struct ib_user_mad_hdr { + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 retries; + __u32 length; + __be32 qpn; + __be32 qkey; + __be16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __be32 flow_label; + __u16 pkey_index; + __u8 reserved[6]; +}; + +/** + * ib_user_mad - MAD packet + * @hdr - MAD packet header + * @data - Contents of MAD + * + */ +struct ib_user_mad { + struct ib_user_mad_hdr hdr; + __u64 data[0]; +}; + +/* + * Earlier versions of this interface definition declared the + * method_mask[] member as an array of __u32 but treated it as a + * bitmap made up of longs in the kernel. This ambiguity meant that + * 32-bit big-endian applications that can run on both 32-bit and + * 64-bit kernels had no consistent ABI to rely on, and 64-bit + * big-endian applications that treated method_mask as being made up + * of 32-bit words would have their bitmap misinterpreted. + * + * To clear up this confusion, we change the declaration of + * method_mask[] to use unsigned long and handle the conversion from + * 32-bit userspace to 64-bit kernel for big-endian systems in the + * compat_ioctl method. Unfortunately, to keep the structure layout + * the same, we need the method_mask[] array to be aligned only to 4 + * bytes even when long is 64 bits, which forces us into this ugly + * typedef. + */ +typedef unsigned long __attribute__((aligned(4))) packed_ulong; +#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long))) + +/** + * ib_user_mad_reg_req - MAD registration request + * @id - Set by the kernel; used to identify agent in future requests. + * @qpn - Queue pair number; must be 0 or 1. + * @method_mask - The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + * @mgmt_class - Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version - Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @rmpp_version: If set, indicates the RMPP version used. + * + */ +struct ib_user_mad_reg_req { + __u32 id; + packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; + __u8 qpn; + __u8 mgmt_class; + __u8 mgmt_class_version; + __u8 oui[3]; + __u8 rmpp_version; +}; + +/** + * ib_user_mad_reg_req2 - MAD registration request + * + * @id - Set by the _kernel_; used by userspace to identify the + * registered agent in future requests. + * @qpn - Queue pair number; must be 0 or 1. + * @mgmt_class - Indicates which management class of MADs should be + * receive by the caller. This field is only required if + * the user wishes to receive unsolicited MADs, otherwise + * it should be 0. + * @mgmt_class_version - Indicates which version of MADs for the given + * management class to receive. + * @res - Ignored. + * @flags - additional registration flags; Must be in the set of + * flags defined in IB_USER_MAD_REG_FLAGS_CAP + * @method_mask - The caller wishes to receive unsolicited MADs for the + * methods whose bit(s) is(are) set. + * @oui - Indicates IEEE OUI to use when mgmt_class is a vendor + * class in the range from 0x30 to 0x4f. Otherwise not + * used. + * @rmpp_version - If set, indicates the RMPP version to use. + */ +enum { + IB_USER_MAD_USER_RMPP = (1 << 0), +}; +#define IB_USER_MAD_REG_FLAGS_CAP (IB_USER_MAD_USER_RMPP) +struct ib_user_mad_reg_req2 { + __u32 id; + __u32 qpn; + __u8 mgmt_class; + __u8 mgmt_class_version; + __u16 res; + __u32 flags; + __u64 method_mask[2]; + __u32 oui; + __u8 rmpp_version; + __u8 reserved[3]; +}; + +#define IB_IOCTL_MAGIC 0x1b + +#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ + struct ib_user_mad_reg_req) + +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) + +#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) + +#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \ + struct ib_user_mad_reg_req2) + +#endif /* IB_USER_MAD_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_mad.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_sa.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_sa.h (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_sa.h (revision 320592) @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_SA_H +#define IB_USER_SA_H + +#ifdef _KERNEL +#include +#else +#include +#endif + +enum { + IB_PATH_GMP = 1, + IB_PATH_PRIMARY = (1<<1), + IB_PATH_ALTERNATE = (1<<2), + IB_PATH_OUTBOUND = (1<<3), + IB_PATH_INBOUND = (1<<4), + IB_PATH_INBOUND_REVERSE = (1<<5), + IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE +}; + +struct ib_path_rec_data { + __u32 flags; + __u32 reserved; + __u32 path_rec[16]; +}; + +struct ib_user_path_rec { + __u8 dgid[16]; + __u8 sgid[16]; + __be16 dlid; + __be16 slid; + __u32 raw_traffic; + __be32 flow_label; + __u32 reversible; + __u32 mtu; + __be16 pkey; + __u8 hop_limit; + __u8 traffic_class; + __u8 numb_path; + __u8 sl; + __u8 mtu_selector; + __u8 rate_selector; + __u8 rate; + __u8 packet_life_time_selector; + __u8 packet_life_time; + __u8 preference; +}; + +#endif /* IB_USER_SA_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_sa.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_verbs.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_verbs.h (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_verbs.h (revision 320592) @@ -0,0 +1,1071 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_VERBS_H +#define IB_USER_VERBS_H + +#ifdef _KERNEL +#include +#else +#include +#endif + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 6 +#define IB_USER_VERBS_CMD_THRESHOLD 50 + +enum { + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_CREATE_AH, + IB_USER_VERBS_CMD_MODIFY_AH, + IB_USER_VERBS_CMD_QUERY_AH, + IB_USER_VERBS_CMD_DESTROY_AH, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_REG_SMR, + IB_USER_VERBS_CMD_REREG_MR, + IB_USER_VERBS_CMD_QUERY_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_ALLOC_MW, + IB_USER_VERBS_CMD_BIND_MW, + IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_RESIZE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_POLL_CQ, + IB_USER_VERBS_CMD_PEEK_CQ, + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_QUERY_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, + IB_USER_VERBS_CMD_POST_SEND, + IB_USER_VERBS_CMD_POST_RECV, + IB_USER_VERBS_CMD_ATTACH_MCAST, + IB_USER_VERBS_CMD_DETACH_MCAST, + IB_USER_VERBS_CMD_CREATE_SRQ, + IB_USER_VERBS_CMD_MODIFY_SRQ, + IB_USER_VERBS_CMD_QUERY_SRQ, + IB_USER_VERBS_CMD_DESTROY_SRQ, + IB_USER_VERBS_CMD_POST_SRQ_RECV, + IB_USER_VERBS_CMD_OPEN_XRCD, + IB_USER_VERBS_CMD_CLOSE_XRCD, + IB_USER_VERBS_CMD_CREATE_XSRQ, + IB_USER_VERBS_CMD_OPEN_QP, +}; + +enum { + IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, + IB_USER_VERBS_EX_CMD_DESTROY_FLOW, + IB_USER_VERBS_EX_CMD_CREATE_WQ, + IB_USER_VERBS_EX_CMD_MODIFY_WQ, + IB_USER_VERBS_EX_CMD_DESTROY_WQ, + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. + */ + +struct ib_uverbs_async_event_desc { + __u64 element; + __u32 event_type; /* enum ib_event_type */ + __u32 reserved; +}; + +struct ib_uverbs_comp_event_desc { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff +#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u +#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 + +#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 + +struct ib_uverbs_cmd_hdr { + __u32 command; + __u16 in_words; + __u16 out_words; +}; + +struct ib_uverbs_ex_cmd_hdr { + __u64 response; + __u16 provider_in_words; + __u16 provider_out_words; + __u32 cmd_hdr_reserved; +}; + +struct ib_uverbs_get_context { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_get_context_resp { + __u32 async_fd; + __u32 num_comp_vectors; +}; + +struct ib_uverbs_query_device { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_device_resp { + __u64 fw_ver; + __be64 node_guid; + __be64 sys_image_guid; + __u64 max_mr_size; + __u64 page_size_cap; + __u32 vendor_id; + __u32 vendor_part_id; + __u32 hw_ver; + __u32 max_qp; + __u32 max_qp_wr; + __u32 device_cap_flags; + __u32 max_sge; + __u32 max_sge_rd; + __u32 max_cq; + __u32 max_cqe; + __u32 max_mr; + __u32 max_pd; + __u32 max_qp_rd_atom; + __u32 max_ee_rd_atom; + __u32 max_res_rd_atom; + __u32 max_qp_init_rd_atom; + __u32 max_ee_init_rd_atom; + __u32 atomic_cap; + __u32 max_ee; + __u32 max_rdd; + __u32 max_mw; + __u32 max_raw_ipv6_qp; + __u32 max_raw_ethy_qp; + __u32 max_mcast_grp; + __u32 max_mcast_qp_attach; + __u32 max_total_mcast_qp_attach; + __u32 max_ah; + __u32 max_fmr; + __u32 max_map_per_fmr; + __u32 max_srq; + __u32 max_srq_wr; + __u32 max_srq_sge; + __u16 max_pkeys; + __u8 local_ca_ack_delay; + __u8 phys_port_cnt; + __u8 reserved[4]; +}; + +struct ib_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +struct ib_uverbs_odp_caps { + __u64 general_caps; + struct { + __u32 rc_odp_caps; + __u32 uc_odp_caps; + __u32 ud_odp_caps; + } per_transport_caps; + __u32 reserved; +}; + +struct ib_uverbs_rss_caps { + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; + __u32 max_rwq_indirection_tables; + __u32 max_rwq_indirection_table_size; + __u32 reserved; +}; + +struct ib_uverbs_ex_query_device_resp { + struct ib_uverbs_query_device_resp base; + __u32 comp_mask; + __u32 response_length; + struct ib_uverbs_odp_caps odp_caps; + __u64 timestamp_mask; + __u64 hca_core_clock; /* in KHZ */ + __u64 device_cap_flags_ex; + struct ib_uverbs_rss_caps rss_caps; + __u32 max_wq_type_rq; + __u32 reserved; +}; + +struct ib_uverbs_query_port { + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 link_layer; + __u8 reserved[2]; +}; + +struct ib_uverbs_alloc_pd { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_alloc_pd_resp { + __u32 pd_handle; +}; + +struct ib_uverbs_dealloc_pd { + __u32 pd_handle; +}; + +struct ib_uverbs_open_xrcd { + __u64 response; + __u32 fd; + __u32 oflags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_open_xrcd_resp { + __u32 xrcd_handle; +}; + +struct ib_uverbs_close_xrcd { + __u32 xrcd_handle; +}; + +struct ib_uverbs_reg_mr { + __u64 response; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_mr_resp { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ib_uverbs_rereg_mr { + __u64 response; + __u32 mr_handle; + __u32 flags; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; +}; + +struct ib_uverbs_rereg_mr_resp { + __u32 lkey; + __u32 rkey; +}; + +struct ib_uverbs_dereg_mr { + __u32 mr_handle; +}; + +struct ib_uverbs_alloc_mw { + __u64 response; + __u32 pd_handle; + __u8 mw_type; + __u8 reserved[3]; +}; + +struct ib_uverbs_alloc_mw_resp { + __u32 mw_handle; + __u32 rkey; +}; + +struct ib_uverbs_dealloc_mw { + __u32 mw_handle; +}; + +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; + +struct ib_uverbs_create_cq { + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_ex_create_cq { + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 comp_mask; + __u32 flags; + __u32 reserved; +}; + +struct ib_uverbs_create_cq_resp { + __u32 cq_handle; + __u32 cqe; +}; + +struct ib_uverbs_ex_create_cq_resp { + struct ib_uverbs_create_cq_resp base; + __u32 comp_mask; + __u32 response_length; +}; + +struct ib_uverbs_resize_cq { + __u64 response; + __u32 cq_handle; + __u32 cqe; + __u64 driver_data[0]; +}; + +struct ib_uverbs_resize_cq_resp { + __u32 cqe; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_poll_cq { + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ib_uverbs_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + union { + __u32 imm_data; + __u32 invalidate_rkey; + } ex; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ib_uverbs_wc wc[0]; +}; + +struct ib_uverbs_req_notify_cq { + __u32 cq_handle; + __u32 solicited_only; +}; + +struct ib_uverbs_destroy_cq { + __u64 response; + __u32 cq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_cq_resp { + __u32 comp_events_reported; + __u32 async_events_reported; +}; + +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_qp_attr { + __u32 qp_attr_mask; + __u32 qp_state; + __u32 cur_qp_state; + __u32 path_mtu; + __u32 path_mig_state; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + + struct ib_uverbs_ah_attr ah_attr; + struct ib_uverbs_ah_attr alt_ah_attr; + + /* ib_qp_cap */ + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 en_sqd_async_notify; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[5]; +}; + +struct ib_uverbs_create_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; + +enum ib_uverbs_create_qp_mask { + IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0, +}; + +enum { + IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, +}; + +struct ib_uverbs_ex_create_qp { + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u32 comp_mask; + __u32 create_flags; + __u32 rwq_ind_tbl_handle; + __u32 reserved1; +}; + +struct ib_uverbs_open_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 qpn; + __u8 qp_type; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +/* also used for open response */ +struct ib_uverbs_create_qp_resp { + __u32 qp_handle; + __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 reserved; +}; + +struct ib_uverbs_ex_create_qp_resp { + struct ib_uverbs_create_qp_resp base; + __u32 comp_mask; + __u32 response_length; +}; + +/* + * This struct needs to remain a multiple of 8 bytes to keep the + * alignment of the modify QP parameters. + */ +struct ib_uverbs_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_uverbs_query_qp { + __u64 response; + __u32 qp_handle; + __u32 attr_mask; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_qp_resp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 sq_sig_all; + __u8 reserved[5]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp_resp { +}; + +struct ib_uverbs_destroy_qp { + __u64 response; + __u32 qp_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_qp_resp { + __u32 events_reported; +}; + +/* + * The ib_uverbs_sge structure isn't used anywhere, since we assume + * the ib_sge structure is packed the same way on 32-bit and 64-bit + * architectures in both kernel and user space. It's just here to + * document the ABI. + */ +struct ib_uverbs_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct ib_uverbs_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + union { + __u32 imm_data; + __u32 invalidate_rkey; + } ex; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ib_uverbs_post_send { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_send_wr send_wr[0]; +}; + +struct ib_uverbs_post_send_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ib_uverbs_post_recv { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv_wr[0]; +}; + +struct ib_uverbs_post_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_post_srq_recv { + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv[0]; +}; + +struct ib_uverbs_post_srq_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_create_ah { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ib_uverbs_ah_attr attr; +}; + +struct ib_uverbs_create_ah_resp { + __u32 ah_handle; +}; + +struct ib_uverbs_destroy_ah { + __u32 ah_handle; +}; + +struct ib_uverbs_attach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_detach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_flow_spec_hdr { + __u32 type; + __u16 size; + __u16 reserved; + /* followed by flow_spec */ + __u64 flow_spec_data[0]; +}; + +struct ib_uverbs_flow_eth_filter { + __u8 dst_mac[6]; + __u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_uverbs_flow_spec_eth { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_eth_filter val; + struct ib_uverbs_flow_eth_filter mask; +}; + +struct ib_uverbs_flow_ipv4_filter { + __be32 src_ip; + __be32 dst_ip; + __u8 proto; + __u8 tos; + __u8 ttl; + __u8 flags; +}; + +struct ib_uverbs_flow_spec_ipv4 { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_ipv4_filter val; + struct ib_uverbs_flow_ipv4_filter mask; +}; + +struct ib_uverbs_flow_tcp_udp_filter { + __be16 dst_port; + __be16 src_port; +}; + +struct ib_uverbs_flow_spec_tcp_udp { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_tcp_udp_filter val; + struct ib_uverbs_flow_tcp_udp_filter mask; +}; + +struct ib_uverbs_flow_ipv6_filter { + __u8 src_ip[16]; + __u8 dst_ip[16]; + __be32 flow_label; + __u8 next_hdr; + __u8 traffic_class; + __u8 hop_limit; + __u8 reserved; +}; + +struct ib_uverbs_flow_spec_ipv6 { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_ipv6_filter val; + struct ib_uverbs_flow_ipv6_filter mask; +}; + +struct ib_uverbs_flow_attr { + __u32 type; + __u16 size; + __u16 priority; + __u8 num_of_specs; + __u8 reserved[2]; + __u8 port; + __u32 flags; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ + struct ib_uverbs_flow_spec_hdr flow_specs[0]; +}; + +struct ib_uverbs_create_flow { + __u32 comp_mask; + __u32 qp_handle; + struct ib_uverbs_flow_attr flow_attr; +}; + +struct ib_uverbs_create_flow_resp { + __u32 comp_mask; + __u32 flow_handle; +}; + +struct ib_uverbs_destroy_flow { + __u32 comp_mask; + __u32 flow_handle; +}; + +struct ib_uverbs_create_srq { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_xsrq { + __u64 response; + __u64 user_handle; + __u32 srq_type; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; + __u32 xrcd_handle; + __u32 cq_handle; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq_resp { + __u32 srq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srqn; +}; + +struct ib_uverbs_modify_srq { + __u32 srq_handle; + __u32 attr_mask; + __u32 max_wr; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_srq_resp { + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; +}; + +struct ib_uverbs_destroy_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_srq_resp { + __u32 events_reported; +}; + +struct ib_uverbs_ex_create_wq { + __u32 comp_mask; + __u32 wq_type; + __u64 user_handle; + __u32 pd_handle; + __u32 cq_handle; + __u32 max_wr; + __u32 max_sge; +}; + +struct ib_uverbs_ex_create_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 wq_handle; + __u32 max_wr; + __u32 max_sge; + __u32 wqn; +}; + +struct ib_uverbs_ex_destroy_wq { + __u32 comp_mask; + __u32 wq_handle; +}; + +struct ib_uverbs_ex_destroy_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 events_reported; + __u32 reserved; +}; + +struct ib_uverbs_ex_modify_wq { + __u32 attr_mask; + __u32 wq_handle; + __u32 wq_state; + __u32 curr_wq_state; +}; + +/* Prevent memory allocation rather than max expected size */ +#define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d +struct ib_uverbs_ex_create_rwq_ind_table { + __u32 comp_mask; + __u32 log_ind_tbl_size; + /* Following are the wq handles according to log_ind_tbl_size + * wq_handle1 + * wq_handle2 + */ + __u32 wq_handles[0]; +}; + +struct ib_uverbs_ex_create_rwq_ind_table_resp { + __u32 comp_mask; + __u32 response_length; + __u32 ind_tbl_handle; + __u32 ind_tbl_num; +}; + +struct ib_uverbs_ex_destroy_rwq_ind_table { + __u32 comp_mask; + __u32 ind_tbl_handle; +}; + +#endif /* IB_USER_VERBS_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/ib_user_verbs.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/rdma_user_cm.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/rdma_user_cm.h (nonexistent) +++ projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/rdma_user_cm.h (revision 320592) @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_CM_H +#define RDMA_USER_CM_H + +#ifdef _KERNEL +#include +#include +#include +#else +#include +#include +#endif + +#include +#include + +#define RDMA_USER_CM_ABI_VERSION 4 + +#define RDMA_MAX_PRIVATE_DATA 256 + +enum { + RDMA_USER_CM_CMD_CREATE_ID, + RDMA_USER_CM_CMD_DESTROY_ID, + RDMA_USER_CM_CMD_BIND_IP, + RDMA_USER_CM_CMD_RESOLVE_IP, + RDMA_USER_CM_CMD_RESOLVE_ROUTE, + RDMA_USER_CM_CMD_QUERY_ROUTE, + RDMA_USER_CM_CMD_CONNECT, + RDMA_USER_CM_CMD_LISTEN, + RDMA_USER_CM_CMD_ACCEPT, + RDMA_USER_CM_CMD_REJECT, + RDMA_USER_CM_CMD_DISCONNECT, + RDMA_USER_CM_CMD_INIT_QP_ATTR, + RDMA_USER_CM_CMD_GET_EVENT, + RDMA_USER_CM_CMD_GET_OPTION, + RDMA_USER_CM_CMD_SET_OPTION, + RDMA_USER_CM_CMD_NOTIFY, + RDMA_USER_CM_CMD_JOIN_IP_MCAST, + RDMA_USER_CM_CMD_LEAVE_MCAST, + RDMA_USER_CM_CMD_MIGRATE_ID, + RDMA_USER_CM_CMD_QUERY, + RDMA_USER_CM_CMD_BIND, + RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_JOIN_MCAST +}; + +/* + * command ABI structures. + */ +struct rdma_ucm_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +struct rdma_ucm_create_id { + __u64 uid; + __u64 response; + __u16 ps; + __u8 qp_type; + __u8 reserved[5]; +}; + +struct rdma_ucm_create_id_resp { + __u32 id; +}; + +struct rdma_ucm_destroy_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_destroy_id_resp { + __u32 events_reported; +}; + +struct rdma_ucm_bind_ip { + __u64 response; + struct sockaddr_in6 addr; + __u32 id; +}; + +struct rdma_ucm_bind { + __u32 id; + __u16 addr_size; + __u16 reserved; + struct sockaddr_storage addr; +}; + +struct rdma_ucm_resolve_ip { + struct sockaddr_in6 src_addr; + struct sockaddr_in6 dst_addr; + __u32 id; + __u32 timeout_ms; +}; + +struct rdma_ucm_resolve_addr { + __u32 id; + __u32 timeout_ms; + __u16 src_size; + __u16 dst_size; + __u32 reserved; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; +}; + +struct rdma_ucm_resolve_route { + __u32 id; + __u32 timeout_ms; +}; + +enum { + RDMA_USER_CM_QUERY_ADDR, + RDMA_USER_CM_QUERY_PATH, + RDMA_USER_CM_QUERY_GID +}; + +struct rdma_ucm_query { + __u64 response; + __u32 id; + __u32 option; +}; + +struct rdma_ucm_query_route_resp { + __u64 node_guid; + struct ib_user_path_rec ib_route[2]; + struct sockaddr_in6 src_addr; + struct sockaddr_in6 dst_addr; + __u32 num_paths; + __u8 port_num; + __u8 reserved[3]; +}; + +struct rdma_ucm_query_addr_resp { + __u64 node_guid; + __u8 port_num; + __u8 reserved; + __u16 pkey; + __u16 src_size; + __u16 dst_size; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; +}; + +struct rdma_ucm_query_path_resp { + __u32 num_paths; + __u32 reserved; + struct ib_path_rec_data path_data[0]; +}; + +struct rdma_ucm_conn_param { + __u32 qp_num; + __u32 qkey; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; + __u8 private_data_len; + __u8 srq; + __u8 responder_resources; + __u8 initiator_depth; + __u8 flow_control; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 valid; +}; + +struct rdma_ucm_ud_param { + __u32 qp_num; + __u32 qkey; + struct ib_uverbs_ah_attr ah_attr; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; + __u8 private_data_len; + __u8 reserved[7]; +}; + +struct rdma_ucm_connect { + struct rdma_ucm_conn_param conn_param; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_listen { + __u32 id; + __u32 backlog; +}; + +struct rdma_ucm_accept { + __u64 uid; + struct rdma_ucm_conn_param conn_param; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_reject { + __u32 id; + __u8 private_data_len; + __u8 reserved[3]; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; +}; + +struct rdma_ucm_disconnect { + __u32 id; +}; + +struct rdma_ucm_init_qp_attr { + __u64 response; + __u32 id; + __u32 qp_state; +}; + +struct rdma_ucm_notify { + __u32 id; + __u32 event; +}; + +struct rdma_ucm_join_ip_mcast { + __u64 response; /* rdma_ucm_create_id_resp */ + __u64 uid; + struct sockaddr_in6 addr; + __u32 id; +}; + +/* Multicast join flags */ +enum { + RDMA_MC_JOIN_FLAG_FULLMEMBER, + RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, + RDMA_MC_JOIN_FLAG_RESERVED, +}; + +struct rdma_ucm_join_mcast { + __u64 response; /* rdma_ucma_create_id_resp */ + __u64 uid; + __u32 id; + __u16 addr_size; + __u16 join_flags; + struct sockaddr_storage addr; +}; + +struct rdma_ucm_get_event { + __u64 response; +}; + +struct rdma_ucm_event_resp { + __u64 uid; + __u32 id; + __u32 event; + __u32 status; + union { + struct rdma_ucm_conn_param conn; + struct rdma_ucm_ud_param ud; + } param; +}; + +/* Option levels */ +enum { + RDMA_OPTION_ID = 0, + RDMA_OPTION_IB = 1 +}; + +/* Option details */ +enum { + RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_ID_REUSEADDR = 1, + RDMA_OPTION_ID_AFONLY = 2, + RDMA_OPTION_IB_PATH = 1 +}; + +struct rdma_ucm_set_option { + __u64 optval; + __u32 id; + __u32 level; + __u32 optname; + __u32 optlen; +}; + +struct rdma_ucm_migrate_id { + __u64 response; + __u32 id; + __u32 fd; +}; + +struct rdma_ucm_migrate_resp { + __u32 events_reported; +}; + +#endif /* RDMA_USER_CM_H */ Property changes on: projects/bsd_rdma_4_9/sys/ofed/include/uapi/rdma/rdma_user_cm.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property