Page MenuHomeFreeBSD

D5791.id21885.diff
No OneTemporary

D5791.id21885.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4186,6 +4186,12 @@
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/peer_mem.c optional ofed \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
+ofed/drivers/infiniband/core/roce_gid_mgmt.c optional ofed \
+ no-depend \
+ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
+ofed/drivers/infiniband/core/roce_gid_cache.c optional ofed \
+ no-depend \
+ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/sa_query.c optional ofed \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/smi.c optional ofed \
Index: sys/modules/ibcore/Makefile
===================================================================
--- sys/modules/ibcore/Makefile
+++ sys/modules/ibcore/Makefile
@@ -6,6 +6,7 @@
agent.c multicast.c smi.c ud_header.c uverbs_main.c \
mad.c peer_mem.c umem.c uverbs_marshall.c \
cache.c device.c packer.c sysfs.c user_mad.c verbs.c \
+ roce_gid_mgmt.c roce_gid_cache.c \
cm.c fmr_pool.c mad_rmpp.c ucm.c cma.c \
vnode_if.h device_if.h bus_if.h pci_if.h \
opt_inet.h opt_inet6.h
Index: sys/ofed/drivers/infiniband/core/addr.c
===================================================================
--- sys/ofed/drivers/infiniband/core/addr.c
+++ sys/ofed/drivers/infiniband/core/addr.c
@@ -42,13 +42,11 @@
#include <net/route.h>
#include <net/netevent.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
#include <netinet/if_ether.h>
#include <netinet6/scope6_var.h>
-
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("IB Address Translation");
-MODULE_LICENSE("Dual BSD/GPL");
+/* IB Address Translation */
struct addr_req {
struct list_head list;
@@ -70,6 +68,21 @@
static struct delayed_work work;
static struct workqueue_struct *addr_wq;
+int rdma_addr_size(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ return sizeof(struct sockaddr_in);
+ case AF_INET6:
+ return sizeof(struct sockaddr_in6);
+ case AF_IB:
+ return sizeof(struct sockaddr_ib);
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(rdma_addr_size);
+
static struct rdma_addr_client self;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
@@ -92,7 +105,7 @@
EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
- const unsigned char *dst_dev_addr)
+ const unsigned char *dst_dev_addr)
{
if (dev->if_type == IFT_INFINIBAND)
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -110,75 +123,54 @@
}
EXPORT_SYMBOL(rdma_copy_addr);
-#define SCOPE_ID_CACHE(_scope_id, _addr6) do { \
- (_addr6)->sin6_addr.s6_addr[3] = (_scope_id); \
- (_addr6)->sin6_scope_id = 0; } while (0)
-
-#define SCOPE_ID_RESTORE(_scope_id, _addr6) do { \
- (_addr6)->sin6_scope_id = (_scope_id); \
- (_addr6)->sin6_addr.s6_addr[3] = 0; } while (0)
-
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
- struct net_device *dev;
- int ret = -EADDRNOTAVAIL;
+ struct net_device *dev = NULL;
+ int ret;
- if (dev_addr->bound_dev_if) {
+ if (dev_addr->bound_dev_if > 0) {
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (!dev)
- return -ENODEV;
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- dev_put(dev);
- return ret;
- }
-
- switch (addr->sa_family) {
- case AF_INET:
- dev = ip_dev_find(&init_net,
- ((struct sockaddr_in *) addr)->sin_addr.s_addr);
-
- if (!dev)
- return ret;
-
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
- dev_put(dev);
- break;
-
-#if defined(INET6)
- case AF_INET6:
- {
- struct sockaddr_in6 *sin6;
+ } else {
+ switch (addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ dev = ip_dev_find(&init_net,
+ ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ struct sockaddr_in6 sin6 = {
+ .sin6_len = sizeof(sin6),
+ .sin6_family = AF_INET6,
+ .sin6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr,
+ .sin6_scope_id = ((struct sockaddr_in6 *)addr)->sin6_scope_id,
+ };
struct ifaddr *ifa;
- in_port_t port;
- uint32_t scope_id;
- sin6 = (struct sockaddr_in6 *)addr;
- port = sin6->sin6_port;
- sin6->sin6_port = 0;
- scope_id = sin6->sin6_scope_id;
- if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
- SCOPE_ID_CACHE(scope_id, sin6);
- ifa = ifa_ifwithaddr(addr);
- sin6->sin6_port = port;
- if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
- SCOPE_ID_RESTORE(scope_id, sin6);
- if (ifa == NULL) {
- ret = -ENODEV;
+ sa6_embedscope(&sin6, 0);
+ ifa = ifa_ifwithaddr((const struct sockaddr *)&sin6);
+ if (ifa == NULL)
break;
- }
- ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
+ dev = ifa->ifa_ifp;
+ if (dev != NULL)
+ if_ref(dev);
ifa_free(ifa);
break;
}
#endif
- default:
- break;
+ default:
+ break;
+ }
}
+ if (dev == NULL)
+ return -ENODEV;
+
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);
@@ -223,9 +215,6 @@
#if defined(INET) || defined(INET6)
in_port_t port;
#endif
-#ifdef INET6
- uint32_t scope_id;
-#endif
u_char edst[MAX_ADDR_LEN];
int multi;
int bcast;
@@ -244,9 +233,9 @@
ifa = NULL;
ifp = NULL;
memset(edst, 0, sizeof(edst));
-#ifdef INET6
- scope_id = -1U;
-#endif
+
+ if (dst_in->sa_family != src_in->sa_family)
+ return -EINVAL;
switch (dst_in->sa_family) {
#ifdef INET
@@ -289,23 +278,13 @@
sin6 = (struct sockaddr_in6 *)dst_in;
if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
multi = 1;
- if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
- /*
- * The IB address comparison fails if the
- * scope ID is set and not part of the addr:
- */
- scope_id = sin6->sin6_scope_id;
- if (scope_id < 256)
- SCOPE_ID_CACHE(scope_id, sin6);
- }
+ sa6_embedscope(sin6, 0);
+
sin6 = (struct sockaddr_in6 *)src_in;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
port = sin6->sin6_port;
sin6->sin6_port = 0;
- if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
- if (scope_id < 256)
- SCOPE_ID_CACHE(scope_id, sin6);
- }
+ sa6_embedscope(sin6, 0);
/*
* If we have a source address to use look it
@@ -396,12 +375,16 @@
case AF_INET:
error = arpresolve(ifp, is_gw, NULL,
is_gw ? rte->rt_gateway : dst_in, edst, NULL, NULL);
+ if (error == 0 && is_gw != 0)
+ addr->network = RDMA_NETWORK_IPV4;
break;
#endif
#ifdef INET6
case AF_INET6:
error = nd6_resolve(ifp, is_gw, NULL,
is_gw ? rte->rt_gateway : dst_in, edst, NULL, NULL);
+ if (error == 0 && is_gw != 0)
+ addr->network = RDMA_NETWORK_IPV6;
break;
#endif
default:
@@ -414,18 +397,19 @@
if (error == 0)
error = -rdma_copy_addr(addr, ifp, edst);
if (error == 0)
- memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
+ memcpy(src_in, ifa->ifa_addr, rdma_addr_size(ifa->ifa_addr));
+ if (error == EWOULDBLOCK || error == EAGAIN)
+ error = ENODATA;
+ switch (src_in->sa_family) {
#ifdef INET6
- if (scope_id < 256) {
- sin6 = (struct sockaddr_in6 *)src_in;
- if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
- SCOPE_ID_RESTORE(scope_id, sin6);
- sin6 = (struct sockaddr_in6 *)dst_in;
- SCOPE_ID_RESTORE(scope_id, sin6);
- }
+ case AF_INET6:
+ sa6_recoverscope((struct sockaddr_in6 *)src_in);
+ sa6_recoverscope((struct sockaddr_in6 *)dst_in);
+ break;
#endif
- if (error == EWOULDBLOCK)
- error = ENODATA;
+ default:
+ break;
+ }
return -error;
}
@@ -460,7 +444,7 @@
list_for_each_entry_safe(req, temp_req, &done_list, list) {
list_del(&req->list);
req->callback(req->status, (struct sockaddr *) &req->src_addr,
- req->addr, req->context);
+ req->addr, req->context);
put_client(req->client);
kfree(req);
}
@@ -490,12 +474,12 @@
goto err;
}
- memcpy(src_in, src_addr, ip_addr_size(src_addr));
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
- memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
+ memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
@@ -569,7 +553,6 @@
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
-
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid, scope_id);
if (ret)
return ret;
@@ -579,6 +562,7 @@
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
+ dev_addr.bound_dev_if = scope_id; /* XXX scope_id overlaps with if_index */
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
@@ -602,14 +586,17 @@
u32 rdma_get_ipv6_scope_id(struct ib_device *ib, u8 port_num)
{
-#ifdef INET6
struct ifnet *ifp;
+
if (ib->get_netdev == NULL)
return (-1U);
ifp = ib->get_netdev(ib, port_num);
if (ifp == NULL)
return (-1U);
+#if defined(INET6)
return (in6_getscopezone(ifp, IPV6_ADDR_SCOPE_LINKLOCAL));
+#elif defined(INET)
+ return (ifp->if_index);
#else
return (-1U);
#endif
@@ -644,7 +631,7 @@
{
if (event == NETEVENT_NEIGH_UPDATE) {
set_timeout(jiffies);
- }
+ }
return 0;
}
Index: sys/ofed/drivers/infiniband/core/agent.c
===================================================================
--- sys/ofed/drivers/infiniband/core/agent.c
+++ sys/ofed/drivers/infiniband/core/agent.c
@@ -94,14 +94,14 @@
port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
- printk(KERN_ERR SPFX "Unable to find port agent\n");
+ dev_err(&device->dev, "Unable to find port agent\n");
return;
}
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
PTR_ERR(ah));
return;
}
@@ -110,7 +110,7 @@
IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_KERNEL);
if (IS_ERR(send_buf)) {
- printk(KERN_ERR SPFX "ib_create_send_mad error\n");
+ dev_err(&device->dev, "ib_create_send_mad error\n");
goto err1;
}
@@ -125,7 +125,7 @@
}
if (ib_post_send_mad(send_buf, NULL)) {
- printk(KERN_ERR SPFX "ib_post_send_mad error\n");
+ dev_err(&device->dev, "ib_post_send_mad error\n");
goto err2;
}
return;
@@ -151,7 +151,7 @@
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+ dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
@@ -202,7 +202,7 @@
port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del(&port_priv->port_list);
Index: sys/ofed/drivers/infiniband/core/cache.c
===================================================================
--- sys/ofed/drivers/infiniband/core/cache.c
+++ sys/ofed/drivers/infiniband/core/cache.c
@@ -42,6 +42,8 @@
#include "core_priv.h"
+#define __IB_ONLY
+
struct ib_pkey_cache {
int table_len;
u16 table[0];
@@ -69,72 +71,239 @@
0 : device->phys_port_cnt;
}
-int ib_get_cached_gid(struct ib_device *device,
- u8 port_num,
- int index,
- union ib_gid *gid)
+static int __IB_ONLY __ib_get_cached_gid(struct ib_device *device,
+ u8 port_num,
+ int index,
+ union ib_gid *gid)
{
struct ib_gid_cache *cache;
unsigned long flags;
- int ret = -EINVAL;
+ int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
+ if (!device->cache.gid_cache)
+ return -ENOENT;
read_lock_irqsave(&device->cache.lock, flags);
- if (device->cache.gid_cache) {
- cache = device->cache.gid_cache[port_num - start_port(device)];
-
- if (cache && index >= 0 && index < cache->table_len) {
- *gid = cache->table[index];
- ret = 0;
- }
+ cache = device->cache.gid_cache[port_num - start_port(device)];
+ if (cache && index >= 0 && index < cache->table_len) {
+ *gid = cache->table[index];
+ ret = 0;
}
read_unlock_irqrestore(&device->cache.lock, flags);
+ return ret;
+}
+
+int ib_cache_use_roce_gid_cache(struct ib_device *device, u8 port_num)
+{
+ if (rdma_port_get_link_layer(device, port_num) ==
+ IB_LINK_LAYER_ETHERNET) {
+ if (device->cache.roce_gid_cache)
+ return 0;
+ else
+ return -EAGAIN;
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(ib_cache_use_roce_gid_cache);
+
+int ib_get_cached_gid(struct ib_device *device,
+ u8 port_num,
+ int index,
+ union ib_gid *gid,
+ struct ib_gid_attr *attr)
+{
+ int ret;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+
+ ret = ib_cache_use_roce_gid_cache(device, port_num);
+ if (!ret)
+ return roce_gid_cache_get_gid(device, port_num, index, gid,
+ attr);
+
+ if (ret == -EAGAIN)
+ return ret;
+
+ ret = __ib_get_cached_gid(device, port_num, index, gid);
+
+ if (!ret && attr) {
+ memset(attr, 0, sizeof(*attr));
+ attr->gid_type = IB_GID_TYPE_IB;
+ }
return ret;
}
EXPORT_SYMBOL(ib_get_cached_gid);
-int ib_find_cached_gid(struct ib_device *device,
- union ib_gid *gid,
- u8 *port_num,
- u16 *index)
+static int __IB_ONLY ___ib_find_cached_gid_by_port(struct ib_device *device,
+ u8 port_num,
+ const union ib_gid *gid,
+ u16 *index)
{
struct ib_gid_cache *cache;
+ u8 p = port_num - start_port(device);
+ int i;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
+ if (!ib_cache_use_roce_gid_cache(device, port_num))
+ return -ENOSYS;
+ if (!device->cache.gid_cache)
+ return -ENOENT;
+
+ cache = device->cache.gid_cache[p];
+ if (!cache)
+ return -ENOENT;
+
+ for (i = 0; i < cache->table_len; ++i) {
+ if (!memcmp(gid, &cache->table[i], sizeof(*gid))) {
+ if (index)
+ *index = i;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int __IB_ONLY __ib_find_cached_gid_by_port(struct ib_device *device,
+ u8 port_num,
+ union ib_gid *gid,
+ u16 *index)
+{
unsigned long flags;
- int p, i;
+ u16 found_index;
+ int ret;
+
+ if (index)
+ *index = -1;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ ret = ___ib_find_cached_gid_by_port(device, port_num, gid,
+ &found_index);
+
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ if (!ret && index)
+ *index = found_index;
+
+ return ret;
+}
+
+static int __IB_ONLY __ib_find_cached_gid(struct ib_device *device,
+ union ib_gid *gid,
+ u8 *port_num,
+ u16 *index)
+{
+ unsigned long flags;
+ u16 found_index;
+ int p;
int ret = -ENOENT;
- *port_num = -1;
+ if (port_num)
+ *port_num = -1;
if (index)
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
- if (!device->cache.gid_cache)
- goto out;
- for (p = 0; p <= end_port(device) - start_port(device); ++p) {
- cache = device->cache.gid_cache[p];
- if (!cache)
- continue;
- for (i = 0; i < cache->table_len; ++i) {
- if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
- *port_num = p + start_port(device);
- if (index)
- *index = i;
- ret = 0;
- goto out;
- }
+
+ for (p = start_port(device); p <= end_port(device); ++p) {
+ if (!___ib_find_cached_gid_by_port(device, p, gid,
+ &found_index)) {
+ if (port_num)
+ *port_num = p;
+ ret = 0;
+ break;
}
}
-out:
+
read_unlock_irqrestore(&device->cache.lock, flags);
+
+ if (!ret && index)
+ *index = found_index;
+
+ return ret;
+}
+
+int ib_find_cached_gid(struct ib_device *device,
+ union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ struct net *net,
+ int if_index,
+ u8 *port_num,
+ u16 *index)
+{
+ int ret = -ENOENT;
+
+ /* Look for a RoCE device with the specified GID. */
+ if (device->cache.roce_gid_cache)
+ ret = roce_gid_cache_find_gid(device, gid, gid_type, net,
+ if_index, port_num, index);
+
+ /* If no RoCE devices with the specified GID, look for IB device. */
+ if (ret && gid_type == IB_GID_TYPE_IB)
+ ret = __ib_find_cached_gid(device, gid, port_num, index);
+
return ret;
}
EXPORT_SYMBOL(ib_find_cached_gid);
+int ib_find_cached_gid_by_port(struct ib_device *device,
+ union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ u8 port_num,
+ struct net *net,
+ int if_index,
+ u16 *index)
+{
+ int ret = -ENOENT;
+
+ /* Look for a RoCE device with the specified GID. */
+ if (!ib_cache_use_roce_gid_cache(device, port_num))
+ return roce_gid_cache_find_gid_by_port(device, gid, gid_type,
+ port_num, net, if_index,
+ index);
+
+ /* If no RoCE devices with the specified GID, look for IB device. */
+ if (gid_type == IB_GID_TYPE_IB)
+ ret = __ib_find_cached_gid_by_port(device, port_num,
+ gid, index);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+
+int ib_find_gid_by_filter(struct ib_device *device,
+ union ib_gid *gid,
+ u8 port_num,
+ bool (*filter)(const union ib_gid *gid,
+ const struct ib_gid_attr *,
+ void *),
+ void *context, u16 *index)
+{
+ /* Look for a RoCE device with the specified GID. */
+ if (!ib_cache_use_roce_gid_cache(device, port_num))
+ return roce_gid_cache_find_gid_by_filter(device, gid,
+ port_num, filter,
+ context, index);
+
+ /* Only RoCE GID cache supports filter function */
+ if (filter)
+ return -ENOSYS;
+
+ /* If no RoCE devices with the specified GID, look for IB device. */
+ return __ib_find_cached_gid_by_port(device, port_num,
+ gid, index);
+}
+EXPORT_SYMBOL(ib_find_gid_by_filter);
+
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
int index,
@@ -142,24 +311,23 @@
{
struct ib_pkey_cache *cache;
unsigned long flags;
- int ret = -EINVAL;
+ int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ if (!device->cache.pkey_cache)
+ return -ENOENT;
- if (device->cache.pkey_cache) {
- cache = device->cache.pkey_cache[port_num - start_port(device)];
+ read_lock_irqsave(&device->cache.lock, flags);
- if (cache && index >= 0 && index < cache->table_len) {
- *pkey = cache->table[index];
- ret = 0;
- }
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+ if (cache && index >= 0 && index < cache->table_len) {
+ *pkey = cache->table[index];
+ ret = 0;
}
read_unlock_irqrestore(&device->cache.lock, flags);
-
return ret;
}
EXPORT_SYMBOL(ib_get_cached_pkey);
@@ -178,17 +346,17 @@
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
- *index = -1;
+ if (!device->cache.pkey_cache)
+ return -ENOENT;
read_lock_irqsave(&device->cache.lock, flags);
- if (!device->cache.pkey_cache)
- goto out;
-
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (!cache)
goto out;
+ *index = -1;
+
for (i = 0; i < cache->table_len; ++i)
if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
if (cache->table[i] & 0x8000) {
@@ -203,6 +371,7 @@
*index = partial_ix;
ret = 0;
}
+
out:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
@@ -222,17 +391,17 @@
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
- *index = -1;
+ if (!device->cache.pkey_cache)
+ return -ENOENT;
read_lock_irqsave(&device->cache.lock, flags);
- if (!device->cache.pkey_cache)
- goto out;
-
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (!cache)
goto out;
+ *index = -1;
+
for (i = 0; i < cache->table_len; ++i)
if (cache->table[i] == pkey) {
*index = i;
@@ -250,7 +419,7 @@
u8 *lmc)
{
unsigned long flags;
- int ret = -EINVAL;
+ int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
@@ -271,9 +440,15 @@
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
- struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
+ struct ib_gid_cache *gid_cache = NULL, *old_gid_cache = NULL;
int i;
int ret;
+ bool use_roce_gid_cache =
+ !ib_cache_use_roce_gid_cache(device,
+ port);
+
+ if (port < start_port(device) || port > end_port(device))
+ return;
if (!(device->cache.pkey_cache && device->cache.gid_cache &&
device->cache.lmc_cache))
@@ -297,12 +472,14 @@
pkey_cache->table_len = tprops->pkey_tbl_len;
- gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
- sizeof *gid_cache->table, GFP_KERNEL);
- if (!gid_cache)
- goto err;
+ if (!use_roce_gid_cache) {
+ gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
+ sizeof(*gid_cache->table), GFP_KERNEL);
+ if (!gid_cache)
+ goto err;
- gid_cache->table_len = tprops->gid_tbl_len;
+ gid_cache->table_len = tprops->gid_tbl_len;
+ }
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
@@ -313,22 +490,28 @@
}
}
- for (i = 0; i < gid_cache->table_len; ++i) {
- ret = ib_query_gid(device, port, i, gid_cache->table + i);
- if (ret) {
- printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
- goto err;
+ if (!use_roce_gid_cache) {
+ for (i = 0; i < gid_cache->table_len; ++i) {
+ ret = ib_query_gid(device, port, i,
+ gid_cache->table + i, NULL);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
}
}
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
- old_gid_cache = device->cache.gid_cache [port - start_port(device)];
+ if (!use_roce_gid_cache)
+ old_gid_cache =
+ device->cache.gid_cache[port - start_port(device)];
device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
- device->cache.gid_cache [port - start_port(device)] = gid_cache;
+ if (!use_roce_gid_cache)
+ device->cache.gid_cache[port - start_port(device)] = gid_cache;
device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
Index: sys/ofed/drivers/infiniband/core/cm.c
===================================================================
--- sys/ofed/drivers/infiniband/core/cm.c
+++ sys/ofed/drivers/infiniband/core/cm.c
@@ -56,14 +56,7 @@
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("InfiniBand CM");
-MODULE_LICENSE("Dual BSD/GPL");
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+/* InfiniBand CM */
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
@@ -88,6 +81,8 @@
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
+ /* sync on cm change port state */
+ spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -169,6 +164,7 @@
struct ib_mad_agent *mad_agent;
struct kobject port_obj;
u8 port_num;
+ struct list_head cm_priv_list;
struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
};
@@ -177,6 +173,7 @@
struct ib_device *ib_device;
struct device *device;
u8 ack_delay;
+ int going_down;
struct cm_port *port[0];
};
@@ -186,8 +183,6 @@
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
- u8 valid;
- u8 smac[ETH_ALEN];
};
struct cm_work {
@@ -248,6 +243,10 @@
u8 service_timeout;
u8 target_ack_delay;
+ struct list_head list;
+ /* indicates that the send port mad was unregistered. */
+ int send_port_not_ready;
+
struct list_head work_list;
atomic_t work_count;
};
@@ -266,11 +265,32 @@
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
+ int ret = 0;
+ unsigned long flags, flags2;
+
+ /* don't let the port to be released till the agent is down */
+ spin_lock_irqsave(&cm.state_lock, flags2);
+ spin_lock_irqsave(&cm.lock, flags);
+ if (cm_id_priv->send_port_not_ready) {
+ pr_info("%s: not valid CM id\n", __func__);
+ ret = -ENODEV;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&cm.lock, flags);
+ /* make sure the port didn't release the mad yet.*/
mad_agent = cm_id_priv->av.port->mad_agent;
+ if (mad_agent == NULL) {
+ pr_info("%s: not valid MAD agent\n", __func__);
+ ret = -ENODEV;
+ goto out;
+ }
ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
- if (IS_ERR(ah))
- return PTR_ERR(ah);
+ if (IS_ERR(ah)){
+ ret = PTR_ERR(ah);
+ goto out;
+ }
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
cm_id_priv->av.pkey_index,
@@ -278,7 +298,8 @@
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
- return PTR_ERR(m);
+ ret = PTR_ERR(m);
+ goto out;
}
/* Timeout set by caller if response is expected. */
@@ -288,7 +309,9 @@
atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
*msg = m;
- return 0;
+out:
+ spin_unlock_irqrestore(&cm.state_lock, flags2);
+ return ret;
}
static int cm_alloc_response_msg(struct cm_port *port,
@@ -357,24 +380,8 @@
grh, &av->ah_attr);
}
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
-{
- struct cm_id_private *cm_id_priv;
-
- cm_id_priv = container_of(id, struct cm_id_private, id);
-
- if (smac != NULL)
- memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
-
- if (alt_smac != NULL)
- memcpy(cm_id_priv->alt_av.smac, alt_smac,
- sizeof(cm_id_priv->alt_av.smac));
-
- return 0;
-}
-EXPORT_SYMBOL(ib_update_cm_av);
-
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+ struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
@@ -385,7 +392,8 @@
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- &p, NULL)) {
+ path->gid_type, path->net,
+ path->ifindex, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -404,9 +412,10 @@
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
- memcpy(av->smac, path->smac, sizeof(av->smac));
- av->valid = 1;
+ spin_lock_irqsave(&cm.lock, flags);
+ list_add_tail(&cm_id_priv->list, &port->cm_priv_list);
+ spin_unlock_irqrestore(&cm.lock, flags);
return 0;
}
@@ -746,6 +755,7 @@
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
+ INIT_LIST_HEAD(&cm_id_priv->list);
atomic_set(&cm_id_priv->work_count, -1);
atomic_set(&cm_id_priv->refcount, 1);
return &cm_id_priv->id;
@@ -831,6 +841,13 @@
{
int wait_time;
unsigned long flags;
+ struct cm_device *cm_dev;
+
+ cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
+ if (!cm_dev) {
+ pr_err("%s Not exists such cm_dev\n", __func__);
+ return;
+ }
spin_lock_irqsave(&cm.lock, flags);
cm_cleanup_timewait(cm_id_priv->timewait_info);
@@ -844,8 +861,14 @@
*/
cm_id_priv->id.state = IB_CM_TIMEWAIT;
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
- queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
- msecs_to_jiffies(wait_time));
+
+ /* Check if the device started its remove_one */
+ spin_lock_irqsave(&cm.lock, flags);
+ if (!cm_dev->going_down)
+ queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
+ msecs_to_jiffies(wait_time));
+ spin_unlock_irqrestore(&cm.lock, flags);
+
cm_id_priv->timewait_info = NULL;
}
@@ -939,6 +962,11 @@
break;
}
+ spin_lock_irq(&cm.lock);
+ if (!list_empty(&cm_id_priv->list) && (!cm_id_priv->send_port_not_ready))
+ list_del(&cm_id_priv->list);
+ spin_unlock_irq(&cm.lock);
+
cm_free_id(cm_id->local_id);
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
@@ -1058,7 +1086,7 @@
cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_retry_count(req_msg, param->retry_count);
cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
- cm_req_set_srq(req_msg, param->srq);
+ cm_req_set_srq(req_msg, param->srq);
}
if (pri_path->hop_limit <= 1) {
@@ -1161,14 +1189,14 @@
return (PTR_ERR(cm_id_priv->timewait_info));
}
- ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+ ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, cm_id_priv);
if (!ret && param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path,
- &cm_id_priv->alt_av);
+ &cm_id_priv->alt_av, cm_id_priv);
}
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- goto error1;
+ goto error1;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1254,6 +1282,7 @@
return ret;
}
+#if 0
static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
__be32 local_qpn, __be32 remote_qpn)
{
@@ -1261,6 +1290,7 @@
((local_ca_guid == remote_ca_guid) &&
(be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
}
+#endif
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct ib_sa_path_rec *primary_path,
@@ -1553,6 +1583,8 @@
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1592,20 +1624,27 @@
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
- /* Workarround: path in req_msg doesn't contain MAC, take it from wc */
- memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, 6);
- work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
+ ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ cm_id_priv->av.ah_attr.grh.sgid_index,
+ &gid, &gid_attr);
+ if (!ret) {
+ work->path[0].gid_type = gid_attr.gid_type;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, cm_id_priv);
+ }
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0, &work->path[0].sgid);
+ work->port->port_num, 0, &work->path[0].sgid,
+ &gid_attr);
+ work->path[0].gid_type = gid_attr.gid_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
goto rejected;
}
if (req_msg->alt_local_lid) {
- ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+ ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, cm_id_priv);
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
@@ -1687,7 +1726,6 @@
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -1754,7 +1792,6 @@
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto error;
}
@@ -1859,7 +1896,6 @@
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("no cm_id_priv\n");
return -EINVAL;
}
@@ -1873,7 +1909,6 @@
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state);
goto error;
}
@@ -1887,7 +1922,6 @@
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("Failed to insert remote id\n");
goto error;
}
/* Check for a stale connection. */
@@ -1901,7 +1935,6 @@
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug("Stale connection.\n");
goto error;
}
spin_unlock(&cm.lock);
@@ -2042,7 +2075,6 @@
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED) {
- pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2112,7 +2144,6 @@
if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
kfree(data);
- pr_debug("cm_id->state(%d) != IB_CM_DREQ_RCVD\n", cm_id->state);
return -EINVAL;
}
@@ -2178,7 +2209,6 @@
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug("no cm_id_priv\n");
return -EINVAL;
}
@@ -2219,7 +2249,6 @@
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2323,7 +2352,6 @@
cm_enter_timewait(cm_id_priv);
break;
default:
- pr_debug("cm_id->state: 0x%x\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2428,13 +2456,12 @@
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
ib_cancel_mad(cm_id_priv->av.port->mad_agent,
cm_id_priv->msg);
- cm_enter_timewait(cm_id_priv);
- break;
+ cm_enter_timewait(cm_id_priv);
+ break;
}
/* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
- pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
goto out;
}
@@ -2497,7 +2524,6 @@
break;
}
default:
- pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
goto error1;
}
@@ -2599,7 +2625,6 @@
counter[CM_MRA_COUNTER]);
/* fall through */
default:
- pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
goto out;
}
@@ -2674,7 +2699,7 @@
goto out;
}
- ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+ ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto out;
cm_id_priv->alt_av.timeout =
@@ -2786,7 +2811,7 @@
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
- if (cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av))
+ if (cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, cm_id_priv))
goto unlock;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
@@ -2979,10 +3004,7 @@
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-
- spin_lock_irqsave(&cm_id_priv->lock, flags);
-
- ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+ ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
if (ret)
goto out;
@@ -2999,19 +3021,21 @@
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_IDLE)
ret = ib_post_send_mad(msg, NULL);
else
ret = -EINVAL;
if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
goto out;
}
cm_id->state = IB_CM_SIDR_REQ_SENT;
cm_id_priv->msg = msg;
-out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out:
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
@@ -3339,7 +3363,6 @@
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("work->cm_event.event: 0x%x\n", work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3353,6 +3376,14 @@
struct cm_work *work;
unsigned long flags;
int ret = 0;
+ struct cm_device *cm_dev;
+ int going_down = 0;
+
+ cm_dev = ib_get_client_data(cm_id->device, &cm_client);
+ if (!cm_dev) {
+ pr_err("%s: No such cm_dev\n", __func__);
+ return -ENODEV;
+ }
work = kmalloc(sizeof *work, GFP_ATOMIC);
if (!work)
@@ -3370,7 +3401,6 @@
ret = -EISCONN;
break;
default:
- pr_debug("cm_id->state: 0x%x\n", cm_id->state);
ret = -EINVAL;
break;
}
@@ -3392,7 +3422,19 @@
work->remote_id = cm_id->remote_id;
work->mad_recv_wc = NULL;
work->cm_event.event = IB_CM_USER_ESTABLISHED;
- queue_delayed_work(cm.wq, &work->work, 0);
+
+ /* Check if the device started its remove_one */
+ spin_lock_irqsave(&cm.lock, flags);
+ if (!cm_dev->going_down)
+ queue_delayed_work(cm.wq, &work->work, 0);
+ else
+ going_down = 1;
+ spin_unlock_irqrestore(&cm.lock, flags);
+
+ if (going_down) {
+ kfree(work);
+ return -ENODEV;
+ }
out:
return ret;
}
@@ -3443,6 +3485,7 @@
enum ib_cm_event_type event;
u16 attr_id;
int paths = 0;
+ int going_down = 0;
switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
case CM_REQ_ATTR_ID:
@@ -3501,7 +3544,19 @@
work->cm_event.event = event;
work->mad_recv_wc = mad_recv_wc;
work->port = port;
- queue_delayed_work(cm.wq, &work->work, 0);
+
+ /* Check if the device started its remove_one */
+ spin_lock_irq(&cm.lock);
+ if (!port->cm_dev->going_down)
+ queue_delayed_work(cm.wq, &work->work, 0);
+ else
+ going_down = 1;
+ spin_unlock_irq(&cm.lock);
+
+ if (going_down) {
+ kfree(work);
+ ib_free_recv_mad(mad_recv_wc);
+ }
}
static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
@@ -3533,7 +3588,6 @@
ret = 0;
break;
default:
- pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3560,31 +3614,6 @@
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
- if (!cm_id_priv->av.valid)
- return -EINVAL;
- if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
- qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
- *qp_attr_mask |= IB_QP_VID;
- }
- if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
- memcpy(qp_attr->smac, cm_id_priv->av.smac,
- sizeof(qp_attr->smac));
- *qp_attr_mask |= IB_QP_SMAC;
- }
- if (cm_id_priv->alt_av.valid) {
- if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
- qp_attr->alt_vlan_id =
- cm_id_priv->alt_av.ah_attr.vlan_id;
- *qp_attr_mask |= IB_QP_ALT_VID;
- }
- if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
- memcpy(qp_attr->alt_smac,
- cm_id_priv->alt_av.smac,
- sizeof(qp_attr->alt_smac));
- *qp_attr_mask |= IB_QP_ALT_SMAC;
- }
- }
-
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
@@ -3606,7 +3635,6 @@
ret = 0;
break;
default:
- pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3666,7 +3694,6 @@
ret = 0;
break;
default:
- pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3693,7 +3720,6 @@
ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
break;
default:
- pr_debug("qp_attr->qp_state: 0x%x\n", qp_attr->qp_state);
ret = -EINVAL;
break;
}
@@ -3806,7 +3832,7 @@
struct cm_port *port;
struct ib_mad_reg_req reg_req = {
.mgmt_class = IB_MGMT_CLASS_CM,
- .mgmt_class_version = IB_CM_CLASS_VERSION
+ .mgmt_class_version = IB_CM_CLASS_VERSION,
};
struct ib_port_modify port_modify = {
.set_port_cap_mask = IB_PORT_CM_SUP
@@ -3825,7 +3851,7 @@
cm_dev->ib_device = ib_device;
cm_get_ack_delay(cm_dev);
-
+ cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
"%s", ib_device->name);
@@ -3844,6 +3870,8 @@
port->cm_dev = cm_dev;
port->port_num = i;
+ INIT_LIST_HEAD(&port->cm_priv_list);
+
ret = cm_create_port_fs(port);
if (ret)
goto error1;
@@ -3895,6 +3923,8 @@
};
unsigned long flags;
int i;
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_agent *cur_mad_agent;
cm_dev = ib_get_client_data(ib_device, &cm_client);
if (!cm_dev)
@@ -3904,11 +3934,31 @@
list_del(&cm_dev->list);
write_unlock_irqrestore(&cm.device_lock, flags);
+ spin_lock_irq(&cm.lock);
+ cm_dev->going_down = 1;
+ spin_unlock_irq(&cm.lock);
+
for (i = 1; i <= ib_device->phys_port_cnt; i++) {
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
- ib_unregister_mad_agent(port->mad_agent);
+ /* mark all the cm_id's as not valid */
+ spin_lock_irq(&cm.lock);
+ list_for_each_entry(cm_id_priv, &port->cm_priv_list, list)
+ cm_id_priv->send_port_not_ready = 1;
+ spin_unlock_irq(&cm.lock);
+
+ /*
+ * We flush the queue here after the going_down set, this
+ * verify that no new works will be queued in the recv handler,
+ * after that we can call the unregister_mad_agent
+ */
flush_workqueue(cm.wq);
+ /* don't free mad_agent if it been used now.*/
+ spin_lock_irq(&cm.state_lock);
+ cur_mad_agent = port->mad_agent;
+ port->mad_agent = NULL;
+ spin_unlock_irq(&cm.state_lock);
+ ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
}
device_unregister(cm_dev->device);
@@ -3923,6 +3973,7 @@
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
+ spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
Index: sys/ofed/drivers/infiniband/core/cma.c
===================================================================
--- sys/ofed/drivers/infiniband/core/cma.c
+++ sys/ofed/drivers/infiniband/core/cma.c
@@ -3,7 +3,6 @@
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
- * Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -51,16 +50,18 @@
#include <net/tcp.h>
#include <net/ipv6.h>
+#include <netinet6/scope6_var.h>
+
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
+#include <rdma/ib.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
+#include "core_priv.h"
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("Generic RDMA CM Agent");
-MODULE_LICENSE("Dual BSD/GPL");
+/* Generic RDMA CM Agent */
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
@@ -78,7 +79,7 @@
static int unify_tcp_port_space = 1;
module_param(unify_tcp_port_space, int, 0644);
MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
- "space allocation (default=1)");
+ "space allocation (default=1)");
static int debug_level = 0;
#define cma_pr(level, priv, format, arg...) \
@@ -123,7 +124,6 @@
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
-static struct workqueue_struct *cma_free_wq;
static DEFINE_IDR(sdp_ps);
static DEFINE_IDR(tcp_ps);
static DEFINE_IDR(udp_ps);
@@ -136,6 +136,8 @@
struct completion comp;
atomic_t refcount;
struct list_head id_list;
+ struct sysctl_ctx_list sysctl_ctx;
+ enum ib_gid_type default_gid_type;
};
struct rdma_bind_list {
@@ -148,6 +150,42 @@
CMA_OPTION_AFONLY,
};
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+ atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie)
+{
+ struct cma_device *cma_dev;
+ struct cma_device *found_cma_dev = NULL;
+
+ mutex_lock(&lock);
+
+ list_for_each_entry(cma_dev, &dev_list, list)
+ if (filter(cma_dev->device, cookie)) {
+ found_cma_dev = cma_dev;
+ break;
+ }
+
+ if (found_cma_dev)
+ cma_ref_dev(found_cma_dev);
+ mutex_unlock(&lock);
+ return found_cma_dev;
+}
+
+enum ib_gid_type cma_get_default_gid_type(struct cma_device *cma_dev)
+{
+ return cma_dev->default_gid_type;
+}
+
+void cma_set_default_gid_type(struct cma_device *cma_dev,
+ enum ib_gid_type default_gid_type)
+{
+ cma_dev->default_gid_type = default_gid_type;
+}
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -168,13 +206,11 @@
int internal_id;
enum rdma_cm_state state;
spinlock_t lock;
- spinlock_t cm_lock;
struct mutex qp_mutex;
struct completion comp;
atomic_t refcount;
struct mutex handler_mutex;
- struct work_struct work; /* garbage coll */
int backlog;
int timeout_ms;
@@ -194,10 +230,8 @@
u8 tos;
u8 reuseaddr;
u8 afonly;
+ enum ib_gid_type gid_type;
int qp_timeout;
- /* cache for mc record params */
- struct ib_sa_mcmember_rec rec;
- int is_valid_rec;
};
struct cma_multicast {
@@ -209,6 +243,7 @@
void *context;
struct sockaddr_storage addr;
struct kref mcref;
+ bool igmp_joined;
};
struct cma_work {
@@ -222,7 +257,6 @@
struct cma_ndev_work {
struct work_struct work;
struct rdma_id_private *id;
- struct rdma_cm_event event;
};
struct iboe_mcast_work {
@@ -313,6 +347,28 @@
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
+/*
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
+{
+ struct in_device *in_dev = NULL;
+
+ if (ndev) {
+ rtnl_lock();
+ in_dev = __in_dev_get_rtnl(ndev);
+ if (in_dev) {
+ if (join)
+ ip_mc_inc_group(in_dev,
+ *(__be32 *)(mgid->raw+12));
+ else
+ ip_mc_dec_group(in_dev,
+ *(__be32 *)(mgid->raw+12));
+ }
+ rtnl_unlock();
+ }
+ return (in_dev) ? 0 : -ENODEV;
+}
+*/
+
static inline u8 sdp_get_majv(u8 sdp_version)
{
return sdp_version >> 4;
@@ -331,15 +387,16 @@
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
- atomic_inc(&cma_dev->refcount);
+ cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
+ id_priv->gid_type = cma_dev->default_gid_type;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
complete(&cma_dev->comp);
@@ -362,16 +419,40 @@
mutex_unlock(&lock);
}
-static int cma_set_qkey(struct rdma_id_private *id_priv)
+static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+}
+
+static inline unsigned short cma_family(struct rdma_id_private *id_priv)
+{
+ return id_priv->id.route.addr.src_addr.ss_family;
+}
+
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
- if (id_priv->qkey)
+ if (id_priv->qkey) {
+ if (qkey && id_priv->qkey != qkey)
+ return -EINVAL;
+ return 0;
+ }
+
+ if (qkey) {
+ id_priv->qkey = qkey;
return 0;
+ }
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
+ case RDMA_PS_IB:
id_priv->qkey = RDMA_UDP_QKEY;
break;
case RDMA_PS_IPOIB:
@@ -400,7 +481,7 @@
return 1;
for (i = 0; i < props.gid_tbl_len; ++i) {
- err = ib_query_gid(device, port_num, i, &tmp);
+ err = ib_query_gid(device, port_num, i, &tmp, NULL);
if (err)
return 1;
if (!memcmp(&tmp, gid, sizeof tmp))
@@ -479,6 +560,27 @@
}
EXPORT_SYMBOL(rdma_find_cmid_laddr);
+static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
+{
+ dev_addr->dev_type = ARPHRD_INFINIBAND;
+ rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
+ ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
+}
+
+static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+ int ret;
+
+ if (addr->sa_family != AF_IB) {
+ ret = rdma_translate_ip(addr, dev_addr, NULL);
+ } else {
+ cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int cma_acquire_dev(struct rdma_id_private *id_priv,
struct rdma_id_private *listen_id_priv)
{
@@ -486,7 +588,7 @@
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid;
int ret = -ENODEV;
- u8 port, found_port;
+ u8 port;
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
@@ -505,16 +607,28 @@
listen_id_priv->id.port_num) == dev_ll) {
cma_dev = listen_id_priv->cma_dev;
port = listen_id_priv->id.port_num;
- if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
- rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
- ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
- &found_port, NULL);
- else
- ret = ib_find_cached_gid(cma_dev->device, &gid,
- &found_port, NULL);
+ if (rdma_node_get_transport(cma_dev->device->node_type) ==
+ RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ int if_index =
+ id_priv->id.route.addr.dev_addr.bound_dev_if;
+
+ ret = ib_find_cached_gid_by_port(cma_dev->device,
+ &iboe_gid,
+ cma_dev->default_gid_type,
+ port,
+ &init_net,
+ if_index,
+ NULL);
+ } else {
+ ret = ib_find_cached_gid_by_port(cma_dev->device, &gid,
+ IB_GID_TYPE_IB, port,
+ NULL, 0, NULL);
+ }
- if (!ret && (port == found_port)) {
- id_priv->id.port_num = found_port;
+ if (!ret) {
+ id_priv->id.port_num = port;
goto out;
}
}
@@ -524,18 +638,36 @@
listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
- if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
- if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
- rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
- ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
- else
- ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
-
- if (!ret && (port == found_port)) {
+ if (rdma_port_get_link_layer(cma_dev->device, port) ==
+ dev_ll) {
+ if (rdma_node_get_transport(cma_dev->device->node_type) ==
+ RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ int if_index =
+ id_priv->id.route.addr.dev_addr.bound_dev_if;
+
+ ret = ib_find_cached_gid_by_port(cma_dev->device,
+ &iboe_gid,
+ cma_dev->default_gid_type,
+ port,
+ &init_net,
+ if_index,
+ NULL);
+ } else {
+ ret = ib_find_cached_gid_by_port(cma_dev->device,
+ &gid,
+ IB_GID_TYPE_IB,
+ port,
+ NULL,
+ 0,
+ NULL);
+ }
+
+ if (!ret) {
id_priv->id.port_num = port;
goto out;
- } else if (ret == 1)
- break;
+ }
}
}
}
@@ -548,6 +680,62 @@
return ret;
}
+/*
+ * Select the source IB device and address to reach the destination IB address.
+ */
+static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
+{
+ struct cma_device *cma_dev, *cur_dev;
+ struct sockaddr_ib *addr;
+ union ib_gid gid, sgid, *dgid;
+ u16 pkey, index;
+ u8 p;
+ int i;
+
+ cma_dev = NULL;
+ addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
+ dgid = (union ib_gid *) &addr->sib_addr;
+ pkey = ntohs(addr->sib_pkey);
+
+ list_for_each_entry(cur_dev, &dev_list, list) {
+ if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+ continue;
+
+ for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
+ continue;
+
+ for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
+ &gid, NULL);
+ i++) {
+ if (!memcmp(&gid, dgid, sizeof(gid))) {
+ cma_dev = cur_dev;
+ sgid = gid;
+ id_priv->id.port_num = p;
+ goto found;
+ }
+
+ if (!cma_dev && (gid.global.subnet_prefix ==
+ dgid->global.subnet_prefix)) {
+ cma_dev = cur_dev;
+ sgid = gid;
+ id_priv->id.port_num = p;
+ }
+ }
+ }
+ }
+
+ if (!cma_dev)
+ return -ENODEV;
+
+found:
+ cma_attach_to_dev(id_priv, cma_dev);
+ addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
+ memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+ cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
+ return 0;
+}
+
static void cma_deref_id(struct rdma_id_private *id_priv)
{
if (atomic_dec_and_test(&id_priv->refcount))
@@ -582,7 +770,6 @@
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
spin_lock_init(&id_priv->lock);
- spin_lock_init(&id_priv->cm_lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1);
@@ -705,24 +892,12 @@
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
goto out;
+
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid);
+ qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
if (ret)
goto out;
- if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
- == RDMA_TRANSPORT_IB &&
- rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
- == IB_LINK_LAYER_ETHERNET) {
- u32 scope_id = rdma_get_ipv6_scope_id(id_priv->id.device,
- id_priv->id.port_num);
-
- ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL,
- scope_id);
- if (ret)
- goto out;
- }
-
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -802,7 +977,7 @@
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (id_priv->id.qp_type == IB_QPT_UD) {
- ret = cma_set_qkey(id_priv);
+ ret = cma_set_qkey(id_priv, 0);
if (ret)
return ret;
@@ -829,11 +1004,11 @@
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);
+
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
if (!id_priv->cm_id.iw) {
qp_attr->qp_access_flags = 0;
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
@@ -852,32 +1027,37 @@
static inline int cma_zero_addr(struct sockaddr *addr)
{
- struct in6_addr *ip6;
-
- if (addr->sa_family == AF_INET)
- return ipv4_is_zeronet(
- ((struct sockaddr_in *)addr)->sin_addr.s_addr);
- else {
- ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
- return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
- ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+ switch (addr->sa_family) {
+ case AF_INET:
+ return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
+ case AF_INET6:
+ return IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ case AF_IB:
+ return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+ default:
+ return 0;
}
}
static inline int cma_loopback_addr(struct sockaddr *addr)
{
- if (addr->sa_family == AF_INET)
- return ipv4_is_loopback(
- ((struct sockaddr_in *) addr)->sin_addr.s_addr);
- else
- return ipv6_addr_loopback(
- &((struct sockaddr_in6 *) addr)->sin6_addr);
+ switch (addr->sa_family) {
+ case AF_INET:
+ return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ case AF_INET6:
+ return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ case AF_IB:
+ return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+ default:
+ return 0;
+ }
}
static inline int cma_any_addr(struct sockaddr *addr)
{
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
+
int
rdma_cma_any_addr(struct sockaddr *addr)
{
@@ -894,18 +1074,31 @@
case AF_INET:
return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
((struct sockaddr_in *) dst)->sin_addr.s_addr;
- default:
+ case AF_INET6:
return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
&((struct sockaddr_in6 *) dst)->sin6_addr);
+ default:
+ return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
+ &((struct sockaddr_ib *) dst)->sib_addr);
}
}
-static inline __be16 cma_port(struct sockaddr *addr)
+static __be16 cma_port(struct sockaddr *addr)
{
- if (addr->sa_family == AF_INET)
+ struct sockaddr_ib *sib;
+
+ switch (addr->sa_family) {
+ case AF_INET:
return ((struct sockaddr_in *) addr)->sin_port;
- else
+ case AF_INET6:
return ((struct sockaddr_in6 *) addr)->sin6_port;
+ case AF_IB:
+ sib = (struct sockaddr_ib *) addr;
+ return htons((u16) (be64_to_cpu(sib->sib_sid) &
+ be64_to_cpu(sib->sib_sid_mask)));
+ default:
+ return 0;
+ }
}
static inline int cma_any_port(struct sockaddr *addr)
@@ -913,11 +1106,11 @@
return !cma_port(addr);
}
-static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
+static int cma_get_net_info(void *hdr, struct rdma_cm_id *listen_id,
u8 *ip_ver, __be16 *port,
union cma_ip_addr **src, union cma_ip_addr **dst)
{
- switch (ps) {
+ switch (listen_id->ps) {
case RDMA_PS_SDP:
if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
SDP_MAJ_VERSION)
@@ -932,10 +1125,19 @@
if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
return -EINVAL;
- *ip_ver = cma_get_ip_ver(hdr);
- *port = ((struct cma_hdr *) hdr)->port;
- *src = &((struct cma_hdr *) hdr)->src_addr;
- *dst = &((struct cma_hdr *) hdr)->dst_addr;
+ if (listen_id->route.addr.src_addr.ss_family != AF_IB) {
+ *ip_ver = cma_get_ip_ver(hdr);
+ *port = ((struct cma_hdr *)hdr)->port;
+ *src = &((struct cma_hdr *)hdr)->src_addr;
+ *dst = &((struct cma_hdr *)hdr)->dst_addr;
+ } else {
+ memset(ip_ver, 0, sizeof(*ip_ver));
+ memset(port, 0, sizeof(*port));
+ memset(src, 0, sizeof(*src));
+ memset(dst, 0, sizeof(*dst));
+
+ return 0;
+ }
break;
}
@@ -944,14 +1146,54 @@
return 0;
}
-static void cma_save_net_info(struct rdma_addr *addr,
- struct rdma_addr *listen_addr,
+static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+ struct ib_sa_path_rec *path)
+{
+ struct sockaddr_ib *listen_ib, *ib;
+
+ listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
+ ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
+ ib->sib_family = listen_ib->sib_family;
+ if (path) {
+ ib->sib_pkey = path->pkey;
+ ib->sib_flowinfo = path->flow_label;
+ memcpy(&ib->sib_addr, &path->sgid, 16);
+ } else {
+ ib->sib_pkey = listen_ib->sib_pkey;
+ ib->sib_flowinfo = listen_ib->sib_flowinfo;
+ ib->sib_addr = listen_ib->sib_addr;
+ }
+
+ ib->sib_sid = listen_ib->sib_sid;
+ ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
+ ib->sib_scope_id = listen_ib->sib_scope_id;
+
+ if (path) {
+ ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
+ ib->sib_family = listen_ib->sib_family;
+ ib->sib_pkey = path->pkey;
+ ib->sib_flowinfo = path->flow_label;
+ memcpy(&ib->sib_addr, &path->dgid, 16);
+ }
+}
+
+static void cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
+ struct ib_cm_event *ib_event,
+ struct rdma_addr *addr, struct rdma_addr *listen_addr,
u8 ip_ver, __be16 port,
union cma_ip_addr *src, union cma_ip_addr *dst)
{
struct sockaddr_in *listen4, *ip4;
struct sockaddr_in6 *listen6, *ip6;
+ if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
+ if (ib_event->event == IB_CM_REQ_RECEIVED)
+ cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
+ else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
+ cma_save_ib_info(id, listen_id, NULL);
+ return;
+ }
+
switch (ip_ver) {
case 4:
listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
@@ -988,14 +1230,12 @@
}
}
-static inline int cma_user_data_offset(enum rdma_port_space ps)
+static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
{
- switch (ps) {
- case RDMA_PS_SDP:
+ if (id_priv->id.ps == RDMA_PS_SDP)
return 0;
- default:
- return sizeof(struct cma_hdr);
- }
+ else
+ return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
}
static void cma_cancel_route(struct rdma_id_private *id_priv)
@@ -1046,8 +1286,7 @@
cma_cancel_route(id_priv);
break;
case RDMA_CM_LISTEN:
- if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
- && !id_priv->cma_dev)
+ if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
cma_cancel_listens(id_priv);
break;
default:
@@ -1057,22 +1296,20 @@
static void cma_release_port(struct rdma_id_private *id_priv)
{
- struct rdma_bind_list *bind_list;
+ struct rdma_bind_list *bind_list = id_priv->bind_list;
- mutex_lock(&lock);
- bind_list = id_priv->bind_list;
- if (!bind_list) {
- mutex_unlock(&lock);
+ if (!bind_list)
return;
- }
+
+ mutex_lock(&lock);
hlist_del(&id_priv->node);
- id_priv->bind_list = NULL;
if (hlist_empty(&bind_list->owners)) {
idr_remove(bind_list->ps, bind_list->port);
kfree(bind_list);
}
mutex_unlock(&lock);
- if (id_priv->sock)
+
+ if ((id_priv->sock!= NULL) && (id_priv->sock->so_count > 0))
sock_release(id_priv->sock);
}
@@ -1090,6 +1327,22 @@
kfree(mc);
break;
case IB_LINK_LAYER_ETHERNET:
+/*
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ }
+*/
kref_put(&mc->mcref, release_mc);
break;
default:
@@ -1097,26 +1350,11 @@
}
}
}
-static void __rdma_free(struct work_struct *work)
-{
- struct rdma_id_private *id_priv;
- id_priv = container_of(work, struct rdma_id_private, work);
-
- wait_for_completion(&id_priv->comp);
-
- if (id_priv->internal_id)
- cma_deref_id(id_priv->id.context);
-
- kfree(id_priv->id.route.path_rec);
- kfree(id_priv);
-}
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
enum rdma_cm_state state;
- unsigned long flags;
- struct ib_cm_id *ib;
id_priv = container_of(id, struct rdma_id_private, id);
state = cma_exch(id_priv, RDMA_CM_DESTROYING);
@@ -1132,17 +1370,10 @@
if (id_priv->cma_dev) {
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- spin_lock_irqsave(&id_priv->cm_lock, flags);
- if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) {
- ib = id_priv->cm_id.ib;
- id_priv->cm_id.ib = NULL;
- spin_unlock_irqrestore(&id_priv->cm_lock, flags);
- ib_destroy_cm_id(ib);
- } else
- spin_unlock_irqrestore(&id_priv->cm_lock, flags);
+ if (id_priv->cm_id.ib)
+ ib_destroy_cm_id(id_priv->cm_id.ib);
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
if (id_priv->cm_id.iw)
iw_destroy_cm_id(id_priv->cm_id.iw);
break;
@@ -1155,8 +1386,13 @@
cma_release_port(id_priv);
cma_deref_id(id_priv);
- INIT_WORK(&id_priv->work, __rdma_free);
- queue_work(cma_free_wq, &id_priv->work);
+ wait_for_completion(&id_priv->comp);
+
+ if (id_priv->internal_id)
+ cma_deref_id(id_priv->id.context);
+
+ kfree(id_priv->id.route.path_rec);
+ kfree(id_priv);
}
EXPORT_SYMBOL(rdma_destroy_id);
@@ -1221,6 +1457,7 @@
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
return 0;
+
memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_REQ_ERROR:
@@ -1298,7 +1535,7 @@
u8 ip_ver;
int ret;
- if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+ if (cma_get_net_info(ib_event->private_data, listen_id,
&ip_ver, &port, &src, &dst))
return NULL;
@@ -1307,7 +1544,8 @@
if (IS_ERR(id))
return NULL;
- cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+ id_priv = container_of(id, struct rdma_id_private, id);
+ cma_save_net_info(id, listen_id, ib_event, &id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
rt = &id->route;
@@ -1321,19 +1559,17 @@
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
- if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+ if (cma_any_addr(cma_src_addr(id_priv))) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
} else {
- ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
- &rt->addr.dev_addr, NULL);
+ ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
if (ret)
goto err;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
- id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = RDMA_CM_CONNECT;
return id_priv;
@@ -1357,22 +1593,21 @@
if (IS_ERR(id))
return NULL;
-
- if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+ if (cma_get_net_info(ib_event->private_data, listen_id,
&ip_ver, &port, &src, &dst))
goto err;
- cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+ id_priv = container_of(id, struct rdma_id_private, id);
+
+ cma_save_net_info(id, listen_id, ib_event, &id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
- ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr, NULL);
+ ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
if (ret)
goto err;
}
- id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = RDMA_CM_CONNECT;
return id_priv;
err:
@@ -1409,16 +1644,6 @@
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int offset, ret;
- u8 smac[ETH_ALEN];
- u8 alt_smac[ETH_ALEN];
- u8 *psmac = smac;
- u8 *palt_smac = alt_smac;
- int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
- RDMA_TRANSPORT_IB) &&
- (rdma_port_get_link_layer(cm_id->device,
- ib_event->param.req_rcvd.port) ==
- IB_LINK_LAYER_ETHERNET));
- int is_sidr = 0;
listen_id = cm_id->context;
if (!cma_check_req_qp_type(&listen_id->id, ib_event))
@@ -1428,10 +1653,9 @@
return -ECONNABORTED;
memset(&event, 0, sizeof event);
- offset = cma_user_data_offset(listen_id->id.ps);
+ offset = cma_user_data_offset(listen_id);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
- is_sidr = 1;
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
@@ -1463,37 +1687,18 @@
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret)
goto err3;
-
- if (is_iboe && !is_sidr) {
- u32 scope_id = rdma_get_ipv6_scope_id(cm_id->device,
- ib_event->param.req_rcvd.port);
-
- if (ib_event->param.req_rcvd.primary_path != NULL)
- rdma_addr_find_smac_by_sgid(
- &ib_event->param.req_rcvd.primary_path->sgid,
- psmac, NULL, scope_id);
- else
- psmac = NULL;
- if (ib_event->param.req_rcvd.alternate_path != NULL)
- rdma_addr_find_smac_by_sgid(
- &ib_event->param.req_rcvd.alternate_path->sgid,
- palt_smac, NULL, scope_id);
- else
- palt_smac = NULL;
- }
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
- if (is_iboe && !is_sidr)
- ib_update_cm_av(cm_id, psmac, palt_smac);
- if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) {
+ /*
+ * Acquire mutex to prevent user executing rdma_destroy_id()
+ * while we're accessing the cm_id.
+ */
+ mutex_lock(&lock);
+ if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
+ (conn_id->id.qp_type != IB_QPT_UD)) {
cma_dbg(container_of(&conn_id->id, struct rdma_id_private, id), "sending MRA\n");
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
- mutex_unlock(&lock);
- mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&lock);
+ mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
return 0;
@@ -1512,10 +1717,14 @@
return ret;
}
-static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
+__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
{
- return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
+ if (addr->sa_family == AF_IB)
+ return ((struct sockaddr_ib *) addr)->sib_sid;
+
+ return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
}
+EXPORT_SYMBOL(rdma_get_service_id);
static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
struct ib_cm_compare_data *compare)
@@ -1558,7 +1767,7 @@
if (!cma_any_addr(addr)) {
sdp_data->dst_addr.ip6 = ip6_addr;
memset(&sdp_mask->dst_addr.ip6, 0xFF,
- sizeof(sdp_mask->dst_addr.ip6));
+ sizeof(sdp_mask->dst_addr.ip6));
}
} else {
cma_set_ip_ver(cma_data, 6);
@@ -1566,7 +1775,7 @@
if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip6 = ip6_addr;
memset(&cma_mask->dst_addr.ip6, 0xFF,
- sizeof(cma_mask->dst_addr.ip6));
+ sizeof cma_mask->dst_addr.ip6);
}
}
break;
@@ -1579,8 +1788,9 @@
{
struct rdma_id_private *id_priv = iw_id->context;
struct rdma_cm_event event;
- struct sockaddr_in *sin;
int ret = 0;
+ struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
@@ -1591,11 +1801,11 @@
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
case IW_CM_EVENT_CONNECT_REPLY:
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- *sin = iw_event->local_addr;
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
- *sin = iw_event->remote_addr;
- switch ((int)iw_event->status) {
+ memcpy(cma_src_addr(id_priv), laddr,
+ rdma_addr_size(laddr));
+ memcpy(cma_dst_addr(id_priv), raddr,
+ rdma_addr_size(raddr));
+ switch (iw_event->status) {
case 0:
event.event = RDMA_CM_EVENT_ESTABLISHED;
event.param.conn.initiator_depth = iw_event->ird;
@@ -1644,11 +1854,11 @@
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct sockaddr_in *sin;
- struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
+ struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
listen_id = cm_id->context;
if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
@@ -1666,14 +1876,7 @@
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
- if (!dev) {
- ret = -EADDRNOTAVAIL;
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
- }
- ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1691,10 +1894,8 @@
cm_id->context = conn_id;
cm_id->cm_handler = cma_iw_handler;
- sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
- *sin = iw_event->local_addr;
- sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
- *sin = iw_event->remote_addr;
+ memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
+ memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
ret = ib_query_device(conn_id->id.device, &attr);
if (ret) {
@@ -1730,8 +1931,6 @@
cma_deref_id(conn_id);
out:
- if (dev)
- dev_put(dev);
mutex_unlock(&listen_id->handler_mutex);
return ret;
}
@@ -1750,8 +1949,8 @@
id_priv->cm_id.ib = id;
- addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
- svc_id = cma_get_service_id(id_priv->id.ps, addr);
+ addr = cma_src_addr(id_priv);
+ svc_id = rdma_get_service_id(&id_priv->id, addr);
if (cma_any_addr(addr) && !id_priv->afonly)
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
@@ -1770,20 +1969,19 @@
static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
{
int ret;
- struct sockaddr_in *sin;
struct iw_cm_id *id;
id = iw_create_cm_id(id_priv->id.device,
- id_priv->sock,
- iw_conn_req_handler,
- id_priv);
+ id_priv->sock,
+ iw_conn_req_handler,
+ id_priv);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.iw = id;
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- id_priv->cm_id.iw->local_addr = *sin;
+ memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
@@ -1812,6 +2010,10 @@
struct rdma_cm_id *id;
int ret;
+ if (cma_family(id_priv) == AF_IB &&
+ rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
id_priv->id.qp_type);
if (IS_ERR(id))
@@ -1821,8 +2023,8 @@
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
dev_id_priv->sock = id_priv->sock;
- memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
- ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+ memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -1888,36 +2090,44 @@
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
- struct rdma_addr *addr = &id_priv->id.route.addr;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct ib_sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
+ struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
- rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
- rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
- path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
+ rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
+ rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
+ path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = cma_get_service_id(id_priv->id.ps,
- (struct sockaddr *) &addr->dst_addr);
+ path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
- if (addr->src_addr.ss_family == AF_INET) {
+ switch (cma_family(id_priv)) {
+ case AF_INET:
path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
- } else {
- sin6 = (struct sockaddr_in6 *) &addr->src_addr;
+ break;
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+ break;
+ case AF_IB:
+ sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
+ path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
+ comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+ break;
}
id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
id_priv->id.port_num, &path_rec,
- comp_mask, timeout_ms,
+ comp_mask, timeout_ms, 0,
GFP_KERNEL, cma_query_handler,
work, &id_priv->query);
@@ -1946,10 +2156,48 @@
kfree(work);
}
-static void cma_ndev_work_handler(struct work_struct *_work)
+static int cma_remove_id_dev(struct rdma_id_private *id_priv)
+{
+ struct rdma_cm_event event;
+ enum rdma_cm_state state;
+ int ret = 0;
+
+ /* Record that we want to remove the device */
+ state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
+ if (state == RDMA_CM_DESTROYING)
+ return 0;
+
+ cma_cancel_operation(id_priv, state);
+ mutex_lock(&id_priv->handler_mutex);
+
+ /* Check for destruction from another callback. */
+ if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
+ goto out;
+
+ memset(&event, 0, sizeof(event));
+ event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+out:
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
+
+
+static void cma_ndev_device_remove_work_handler(struct work_struct *_work)
+{
+ struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+ struct rdma_id_private *id_priv = work->id;
+
+ cma_remove_id_dev(id_priv);
+ cma_deref_id(id_priv);
+}
+
+/* Used for BONDING
+static void cma_ndev_addr_change_work_handler(struct work_struct *_work)
{
struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
struct rdma_id_private *id_priv = work->id;
+ struct rdma_cm_event event;
int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
@@ -1957,7 +2205,9 @@
id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+ memset(&event, 0, sizeof(event));
+ event.event = RDMA_CM_EVENT_ADDR_CHANGE;
+ if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -1969,6 +2219,7 @@
rdma_destroy_id(&id_priv->id);
kfree(work);
}
+*/
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
{
@@ -2053,20 +2304,37 @@
return def_prec2sl & 7;
}
-static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+/* eb072c4b8da0ba87bc870c7911aae180bae34d4a
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
{
- struct rdma_route *route = &id_priv->id.route;
- struct rdma_addr *addr = &route->addr;
- struct cma_work *work;
+ int prio;
+ struct net_device *dev;
+
+ prio = rt_tos2priority(tos);
+ dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
+ vlan_dev_real_dev(ndev) : ndev;
+
+ if (netdev_get_num_tc(dev))
+ return netdev_get_prio_tc_map(dev, prio);
+
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ return (vlan_dev_get_egress_qos_mask(ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+#endif
+ return 0;
+}
+*/
+
+static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ struct rdma_addr *addr = &route->addr;
+ struct cma_work *work;
int ret;
- struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
- struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
struct net_device *ndev = NULL;
- if (src_addr->sin_family != dst_addr->sin_family)
- return -EINVAL;
-
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
@@ -2082,35 +2350,42 @@
route->num_paths = 1;
- if (addr->dev_addr.bound_dev_if)
+ if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ route->path_rec->net = &init_net;
+ route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+ route->path_rec->gid_type = id_priv->gid_type;
+ }
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
- memcpy(route->path_rec->smac, IF_LLADDR(ndev), ndev->if_addrlen);
-
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- route->path_rec->hop_limit = 1;
+ /* Use the hint from IP Stack to select GID Type */
+ if (addr->dev_addr.network != RDMA_NETWORK_IB) {
+ route->path_rec->hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ } else {
+ route->path_rec->hop_limit = 1;
+ }
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
+ //route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); /* eb072c4b8da0ba87bc870c7911aae180bae34d4a */
route->path_rec->sl = tos_to_sl(id_priv->tos);
-
route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ route->path_rec->traffic_class = id_priv->tos;
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
@@ -2157,7 +2432,6 @@
}
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
ret = cma_resolve_iw_route(id_priv, timeout_ms);
break;
default:
@@ -2175,6 +2449,23 @@
}
EXPORT_SYMBOL(rdma_resolve_route);
+static void cma_set_loopback(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
+ 0, 0, 0, htonl(1));
+ break;
+ default:
+ ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
+ 0, 0, 0, htonl(1));
+ break;
+ }
+}
+
int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type)
{
/* APM is not supported yet */
@@ -2184,29 +2475,41 @@
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
- struct cma_device *cma_dev;
+ struct cma_device *cma_dev, *cur_dev;
struct ib_port_attr port_attr;
union ib_gid gid;
u16 pkey;
int ret;
u8 p;
+ cma_dev = NULL;
mutex_lock(&lock);
- if (list_empty(&dev_list)) {
+ list_for_each_entry(cur_dev, &dev_list, list) {
+ if (cma_family(id_priv) == AF_IB &&
+ rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
+ continue;
+
+ if (!cma_dev)
+ cma_dev = cur_dev;
+
+ for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ if (!ib_query_port(cur_dev->device, p, &port_attr) &&
+ port_attr.state == IB_PORT_ACTIVE) {
+ cma_dev = cur_dev;
+ goto port_found;
+ }
+ }
+ }
+
+ if (!cma_dev) {
ret = -ENODEV;
goto out;
}
- list_for_each_entry(cma_dev, &dev_list, list)
- for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
- if (!ib_query_port(cma_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE)
- goto port_found;
p = 1;
- cma_dev = list_entry(dev_list.next, struct cma_device, list);
port_found:
- ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
+ ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
if (ret)
goto out;
@@ -2222,6 +2525,7 @@
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
+ cma_set_loopback(cma_src_addr(id_priv));
out:
mutex_unlock(&lock);
return ret;
@@ -2239,8 +2543,7 @@
RDMA_CM_ADDR_RESOLVED))
goto out;
- memcpy(&id_priv->id.route.addr.src_addr, src_addr,
- ip_addr_size(src_addr));
+ memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv, NULL);
@@ -2268,7 +2571,6 @@
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
- struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@@ -2285,18 +2587,6 @@
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
- if (cma_zero_addr(src)) {
- dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
- if ((src->sa_family = dst->sa_family) == AF_INET) {
- ((struct sockaddr_in *)src)->sin_addr =
- ((struct sockaddr_in *)dst)->sin_addr;
- } else {
- ((struct sockaddr_in6 *)src)->sin6_addr =
- ((struct sockaddr_in6 *)dst)->sin6_addr;
- }
- }
-
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ADDR_QUERY;
@@ -2309,15 +2599,23 @@
return ret;
}
-static int cma_resolve_scif(struct rdma_id_private *id_priv)
+static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
{
struct cma_work *work;
+ int ret;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
- /* we probably can leave it empty here */
+ if (!id_priv->cma_dev) {
+ ret = cma_resolve_ib_dev(id_priv);
+ if (ret)
+ goto err;
+ }
+
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
+ &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
@@ -2326,6 +2624,9 @@
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
+err:
+ kfree(work);
+ return ret;
}
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2334,12 +2635,13 @@
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
src_addr->sa_family = dst_addr->sa_family;
-#ifdef INET6
if (dst_addr->sa_family == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *) src_addr)->sib_pkey =
+ ((struct sockaddr_ib *) dst_addr)->sib_pkey;
}
-#endif
}
if (!cma_any_addr(src_addr))
return rdma_bind_addr(id, src_addr);
@@ -2352,6 +2654,7 @@
#ifdef INET6
struct sockaddr_in6 in6;
#endif
+ struct sockaddr_ib ib;
} addr;
#endif
@@ -2359,19 +2662,25 @@
#ifdef INET
case AF_INET:
memset(&addr.in, 0, sizeof(addr.in));
- addr.in.sin_family = dst_addr->sa_family;
+ addr.in.sin_family = AF_INET;
addr.in.sin_len = sizeof(addr.in);
return rdma_bind_addr(id, (struct sockaddr *)&addr.in);
#endif
#ifdef INET6
case AF_INET6:
memset(&addr.in6, 0, sizeof(addr.in6));
- addr.in6.sin6_family = dst_addr->sa_family;
+ addr.in6.sin6_family = AF_INET6;
addr.in6.sin6_len = sizeof(addr.in6);
addr.in6.sin6_scope_id =
((struct sockaddr_in6 *)dst_addr)->sin6_scope_id;
return rdma_bind_addr(id, (struct sockaddr *)&addr.in6);
#endif
+ case AF_IB:
+ memset(&addr.ib, 0, sizeof(addr.ib));
+ addr.ib.sib_family = AF_IB;
+ addr.ib.sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ return rdma_bind_addr(id, (struct sockaddr *)&addr.ib);
default:
return -EINVAL;
}
@@ -2391,20 +2700,25 @@
return ret;
}
+ if (cma_family(id_priv) != dst_addr->sa_family)
+ return -EINVAL;
+
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
- memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
- if (cma_any_addr(dst_addr))
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+ if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
- else if (id_priv->id.device &&
- rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF)
- ret = cma_resolve_scif(id_priv);
- else
- ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
- dst_addr, &id->route.addr.dev_addr,
- timeout_ms, addr_handler, id_priv);
+ } else {
+ if (dst_addr->sa_family == AF_IB) {
+ ret = cma_resolve_ib_addr(id_priv);
+ } else {
+ ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
+ dst_addr, &id->route.addr.dev_addr,
+ timeout_ms, addr_handler, id_priv);
+ }
+ }
if (ret)
goto err;
@@ -2424,7 +2738,7 @@
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (id_priv->state == RDMA_CM_IDLE) {
+ if (reuse || id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
@@ -2458,10 +2772,29 @@
static void cma_bind_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *id_priv)
{
- struct sockaddr_in *sin;
+ struct sockaddr *addr;
+ struct sockaddr_ib *sib;
+ u64 sid, mask;
+ __be16 port;
+
+ addr = cma_src_addr(id_priv);
+ port = htons(bind_list->port);
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- sin->sin_port = htons(bind_list->port);
+ switch (addr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *) addr)->sin_port = port;
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *) addr)->sin6_port = port;
+ break;
+ case AF_IB:
+ sib = (struct sockaddr_ib *) addr;
+ sid = be64_to_cpu(sib->sib_sid);
+ mask = be64_to_cpu(sib->sib_sid_mask);
+ sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
+ sib->sib_sid_mask = cpu_to_be64(~0ULL);
+ break;
+ }
id_priv->bind_list = bind_list;
hlist_add_head(&id_priv->node, &bind_list->owners);
}
@@ -2520,7 +2853,7 @@
last_used_port = rover;
if (ret != -EADDRNOTAVAIL)
return ret;
- }
+ }
if (--remaining) {
rover++;
if ((rover < low) || (rover > high))
@@ -2542,7 +2875,7 @@
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
- addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ addr = cma_src_addr(id_priv);
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
@@ -2551,7 +2884,7 @@
cur_id->reuseaddr)
continue;
- cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
+ cur_addr = cma_src_addr(cur_id);
if (id_priv->afonly && cur_id->afonly &&
(addr->sa_family != cur_addr->sa_family))
continue;
@@ -2571,7 +2904,7 @@
unsigned short snum;
int ret;
- snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+ snum = ntohs(cma_port(cma_src_addr(id_priv)));
bind_list = idr_find(ps, snum);
if (!bind_list) {
@@ -2608,7 +2941,7 @@
#ifdef __linux__
ret = sock->ops->bind(sock,
(struct sockaddr *) &id_priv->id.route.addr.src_addr,
- ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
+ rdma_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
#else
ret = -sobind(sock,
(struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -2619,7 +2952,7 @@
return ret;
}
- size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
+ size = rdma_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
ret = sock_getname(sock,
(struct sockaddr *) &id_priv->id.route.addr.src_addr,
&size, 0);
@@ -2632,43 +2965,81 @@
return 0;
}
-static int cma_get_port(struct rdma_id_private *id_priv)
+static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
{
- struct idr *ps;
int ret;
switch (id_priv->id.ps) {
case RDMA_PS_SDP:
- ps = &sdp_ps;
- break;
+ return &sdp_ps;
case RDMA_PS_TCP:
- ps = &tcp_ps;
if (unify_tcp_port_space) {
ret = cma_get_tcp_port(id_priv);
if (ret)
- goto out;
+ return NULL;
}
- break;
+ return &tcp_ps;
case RDMA_PS_UDP:
- ps = &udp_ps;
- break;
+ return &udp_ps;
case RDMA_PS_IPOIB:
- ps = &ipoib_ps;
- break;
+ return &ipoib_ps;
case RDMA_PS_IB:
- ps = &ib_ps;
- break;
+ return &ib_ps;
default:
- return -EPROTONOSUPPORT;
+ return NULL;
}
+}
+
+static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
+{
+ struct idr *ps = NULL;
+ struct sockaddr_ib *sib;
+ u64 sid_ps, mask, sid;
+
+ sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
+ mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
+ sid = be64_to_cpu(sib->sib_sid) & mask;
+
+ if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
+ sid_ps = RDMA_IB_IP_PS_IB;
+ ps = &ib_ps;
+ } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
+ (sid == (RDMA_IB_IP_PS_TCP & mask))) {
+ sid_ps = RDMA_IB_IP_PS_TCP;
+ ps = &tcp_ps;
+ } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
+ (sid == (RDMA_IB_IP_PS_UDP & mask))) {
+ sid_ps = RDMA_IB_IP_PS_UDP;
+ ps = &udp_ps;
+ }
+
+ if (ps) {
+ sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
+ sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
+ be64_to_cpu(sib->sib_sid_mask));
+ }
+ return ps;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+ struct idr *ps;
+ int ret;
+
+ if (cma_family(id_priv) != AF_IB)
+ ps = cma_select_inet_ps(id_priv);
+ else
+ ps = cma_select_ib_ps(id_priv);
+ if (!ps)
+ return -EPROTONOSUPPORT;
mutex_lock(&lock);
- if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
+ if (cma_any_port(cma_src_addr(id_priv)))
ret = cma_alloc_any_port(ps, id_priv);
else
ret = cma_use_port(ps, id_priv);
mutex_unlock(&lock);
-out:
+
return ret;
}
@@ -2676,17 +3047,20 @@
struct sockaddr *addr)
{
#if defined(INET6)
- struct sockaddr_in6 *sin6;
+ struct sockaddr_in6 sin6;
if (addr->sa_family != AF_INET6)
return 0;
- sin6 = (struct sockaddr_in6 *) addr;
- if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
- !sin6->sin6_scope_id)
- return -EINVAL;
+ sin6 = *(struct sockaddr_in6 *)addr;
- dev_addr->bound_dev_if = sin6->sin6_scope_id;
+ if (!(IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)))
+ return 0;
+
+ if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0)
+ return -EINVAL;
+
+ dev_addr->bound_dev_if = sin6.sin6_scope_id;
#endif
return 0;
}
@@ -2698,8 +3072,8 @@
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == RDMA_CM_IDLE) {
- ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
- ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
+ id->route.addr.src_addr.ss_family = AF_INET;
+ ret = rdma_bind_addr(id, cma_src_addr(id_priv));
if (ret)
return ret;
}
@@ -2722,7 +3096,6 @@
goto err;
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
ret = cma_iw_listen(id_priv, backlog);
if (ret)
goto err;
@@ -2746,12 +3119,11 @@
{
struct rdma_id_private *id_priv;
int ret;
-#if defined(INET6)
int ipv6only;
size_t var_size = sizeof(int);
-#endif
- if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
+ addr->sa_family != AF_IB)
return -EAFNOSUPPORT;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2762,9 +3134,9 @@
if (ret)
goto err1;
- memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+ memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = rdma_translate_ip(addr, &id->route.addr.dev_addr, NULL);
+ ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
@@ -2778,8 +3150,9 @@
id_priv->afonly = 1;
#if defined(INET6)
else if (addr->sa_family == AF_INET6)
- id_priv->afonly = kernel_sysctlbyname(&thread0, "net.inet6.ip6.v6only",
- &ipv6only, &var_size, NULL, 0, NULL, 0);
+ id_priv->afonly = kernel_sysctlbyname(&thread0,
+ "net.inet6.ip6.v6only", &ipv6only, &var_size,
+ NULL, 0, NULL, 0);
#endif
}
ret = cma_get_port(id_priv);
@@ -2796,20 +3169,20 @@
}
EXPORT_SYMBOL(rdma_bind_addr);
-static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
- struct rdma_route *route)
+static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
{
struct cma_hdr *cma_hdr;
struct sdp_hh *sdp_hdr;
- if (route->addr.src_addr.ss_family == AF_INET) {
+ cma_hdr = hdr;
+ cma_hdr->cma_version = CMA_VERSION;
+ if (cma_family(id_priv) == AF_INET) {
struct sockaddr_in *src4, *dst4;
- src4 = (struct sockaddr_in *) &route->addr.src_addr;
- dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+ src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
+ dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
- switch (ps) {
- case RDMA_PS_SDP:
+ if (id_priv->id.ps == RDMA_PS_SDP) {
sdp_hdr = hdr;
if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
return -EINVAL;
@@ -2817,24 +3190,19 @@
sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
sdp_hdr->port = src4->sin_port;
- break;
- default:
- cma_hdr = hdr;
- cma_hdr->cma_version = CMA_VERSION;
+ } else {
cma_set_ip_ver(cma_hdr, 4);
cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
cma_hdr->port = src4->sin_port;
- break;
}
- } else {
+ } else if (cma_family(id_priv) == AF_INET6) {
struct sockaddr_in6 *src6, *dst6;
- src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
- dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
+ src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
+ dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
- switch (ps) {
- case RDMA_PS_SDP:
+ if (id_priv->id.ps == RDMA_PS_SDP) {
sdp_hdr = hdr;
if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
return -EINVAL;
@@ -2842,15 +3210,11 @@
sdp_hdr->src_addr.ip6 = src6->sin6_addr;
sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
sdp_hdr->port = src6->sin6_port;
- break;
- default:
- cma_hdr = hdr;
- cma_hdr->cma_version = CMA_VERSION;
+ } else {
cma_set_ip_ver(cma_hdr, 6);
cma_hdr->src_addr.ip6 = src6->sin6_addr;
cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
cma_hdr->port = src6->sin6_port;
- break;
}
}
return 0;
@@ -2881,15 +3245,10 @@
event.status = ib_event->param.sidr_rep_rcvd.status;
break;
}
- ret = cma_set_qkey(id_priv);
+ ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
event.event = RDMA_CM_EVENT_ADDR_ERROR;
- event.status = -EINVAL;
- break;
- }
- if (id_priv->qkey != rep->qkey) {
- event.event = RDMA_CM_EVENT_UNREACHABLE;
- event.status = -EINVAL;
+ event.status = ret;
break;
}
ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
@@ -2924,27 +3283,34 @@
struct rdma_conn_param *conn_param)
{
struct ib_cm_sidr_req_param req;
- struct rdma_route *route;
struct ib_cm_id *id;
- int ret;
+ void *private_data;
+ int offset, ret;
- req.private_data_len = sizeof(struct cma_hdr) +
- conn_param->private_data_len;
+ memset(&req, 0, sizeof req);
+ offset = cma_user_data_offset(id_priv);
+ req.private_data_len = offset + conn_param->private_data_len;
if (req.private_data_len < conn_param->private_data_len)
return -EINVAL;
- req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
- if (!req.private_data)
- return -ENOMEM;
+ if (req.private_data_len) {
+ private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+ if (!private_data)
+ return -ENOMEM;
+ } else {
+ private_data = NULL;
+ }
if (conn_param->private_data && conn_param->private_data_len)
- memcpy((void *) req.private_data + sizeof(struct cma_hdr),
- conn_param->private_data, conn_param->private_data_len);
+ memcpy(private_data + offset, conn_param->private_data,
+ conn_param->private_data_len);
- route = &id_priv->id.route;
- ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
- if (ret)
- goto out;
+ if (private_data) {
+ ret = cma_format_hdr(private_data, id_priv);
+ if (ret)
+ goto out;
+ req.private_data = private_data;
+ }
id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
id_priv);
@@ -2954,9 +3320,8 @@
}
id_priv->cm_id.ib = id;
- req.path = route->path_rec;
- req.service_id = cma_get_service_id(id_priv->id.ps,
- (struct sockaddr *) &route->addr.dst_addr);
+ req.path = id_priv->id.route.path_rec;
+ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.timeout_ms = 1 << (cma_response_timeout - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -2967,7 +3332,7 @@
id_priv->cm_id.ib = NULL;
}
out:
- kfree(req.private_data);
+ kfree(private_data);
return ret;
}
@@ -2981,14 +3346,18 @@
int offset, ret;
memset(&req, 0, sizeof req);
- offset = cma_user_data_offset(id_priv->id.ps);
+ offset = cma_user_data_offset(id_priv);
req.private_data_len = offset + conn_param->private_data_len;
if (req.private_data_len < conn_param->private_data_len)
return -EINVAL;
- private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
- if (!private_data)
- return -ENOMEM;
+ if (req.private_data_len) {
+ private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+ if (!private_data)
+ return -ENOMEM;
+ } else {
+ private_data = NULL;
+ }
if (conn_param->private_data && conn_param->private_data_len)
memcpy(private_data + offset, conn_param->private_data,
@@ -3002,17 +3371,18 @@
id_priv->cm_id.ib = id;
route = &id_priv->id.route;
- ret = cma_format_hdr(private_data, id_priv->id.ps, route);
- if (ret)
- goto out;
- req.private_data = private_data;
+ if (private_data) {
+ ret = cma_format_hdr(private_data, id_priv);
+ if (ret)
+ goto out;
+ req.private_data = private_data;
+ }
req.primary_path = &route->path_rec[0];
if (route->num_paths == 2)
req.alternate_path = &route->path_rec[1];
- req.service_id = cma_get_service_id(id_priv->id.ps,
- (struct sockaddr *) &route->addr.dst_addr);
+ req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.qp_num = id_priv->qp_num;
req.qp_type = id_priv->id.qp_type;
req.starting_psn = id_priv->seq_num;
@@ -3021,8 +3391,8 @@
req.flow_control = conn_param->flow_control;
req.retry_count = min_t(u8, 7, conn_param->retry_count);
req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
- req.remote_cm_response_timeout = cma_response_timeout;
- req.local_cm_response_timeout = cma_response_timeout;
+ req.remote_cm_response_timeout = cma_response_timeout;
+ req.local_cm_response_timeout = cma_response_timeout;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
@@ -3042,32 +3412,30 @@
struct rdma_conn_param *conn_param)
{
struct iw_cm_id *cm_id;
- struct sockaddr_in* sin;
int ret;
struct iw_cm_conn_param iw_param;
cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
- cma_iw_handler, id_priv);
+ cma_iw_handler, id_priv);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
id_priv->cm_id.iw = cm_id;
- sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
- cm_id->local_addr = *sin;
-
- sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
- cm_id->remote_addr = *sin;
+ memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
+ rdma_addr_size(cma_src_addr(id_priv)));
+ memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
+ rdma_addr_size(cma_dst_addr(id_priv)));
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
goto out;
if (conn_param) {
- iw_param.ord = conn_param->initiator_depth;
- iw_param.ird = conn_param->responder_resources;
- iw_param.private_data = conn_param->private_data;
- iw_param.private_data_len = conn_param->private_data_len;
+ iw_param.ord = conn_param->initiator_depth;
+ iw_param.ird = conn_param->responder_resources;
+ iw_param.private_data = conn_param->private_data;
+ iw_param.private_data_len = conn_param->private_data_len;
iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
} else {
memset(&iw_param, 0, sizeof iw_param);
@@ -3104,7 +3472,6 @@
ret = cma_connect_ib(id_priv, conn_param);
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
ret = cma_connect_iw(id_priv, conn_param);
break;
default:
@@ -3146,6 +3513,7 @@
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+
cma_dbg(id_priv, "sending REP\n");
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
@@ -3178,7 +3546,7 @@
}
static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
- enum ib_cm_sidr_status status,
+ enum ib_cm_sidr_status status, u32 qkey,
const void *private_data, int private_data_len)
{
struct ib_cm_sidr_rep_param rep;
@@ -3187,7 +3555,7 @@
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
- ret = cma_set_qkey(id_priv);
+ ret = cma_set_qkey(id_priv, qkey);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
@@ -3208,6 +3576,7 @@
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->owner = curthread->td_proc->p_pid;
+
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3220,21 +3589,21 @@
case RDMA_TRANSPORT_IB:
if (id->qp_type == IB_QPT_UD) {
if (conn_param)
- ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
- conn_param->private_data,
- conn_param->private_data_len);
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+ conn_param->qkey,
+ conn_param->private_data,
+ conn_param->private_data_len);
else
ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
- NULL, 0);
+ 0, NULL, 0);
} else {
if (conn_param)
- ret = cma_accept_ib(id_priv, conn_param);
- else
- ret = cma_rep_recv(id_priv);
+ ret = cma_accept_ib(id_priv, conn_param);
+ else
+ ret = cma_rep_recv(id_priv);
}
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
ret = cma_accept_iw(id_priv, conn_param);
break;
default:
@@ -3287,7 +3656,7 @@
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
if (id->qp_type == IB_QPT_UD)
- ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
else {
cma_dbg(id_priv, "sending REJ\n");
@@ -3297,7 +3666,6 @@
}
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
break;
@@ -3324,14 +3692,12 @@
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- cma_dbg(id_priv, "sending DREQ\n");
if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
cma_dbg(id_priv, "sending DREP\n");
ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
}
break;
case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
break;
default:
@@ -3348,17 +3714,15 @@
struct rdma_id_private *id_priv;
struct cma_multicast *mc = multicast->context;
struct rdma_cm_event event;
- struct rdma_dev_addr *dev_addr;
int ret;
- struct net_device *ndev = NULL;
- u16 vlan;
id_priv = mc->id_priv;
- dev_addr = &id_priv->id.route.addr.dev_addr;
if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
return 0;
+ if (!status)
+ status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
@@ -3368,32 +3732,16 @@
memset(&event, 0, sizeof event);
event.status = status;
event.param.ud.private_data = mc->context;
- ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (!ndev) {
- status = -ENODEV;
- } else {
- vlan = rdma_vlan_dev_vlan_id(ndev);
- dev_put(ndev);
- }
if (!status) {
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
&event.param.ud.ah_attr);
- event.param.ud.ah_attr.vlan_id = vlan;
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- } else {
+ } else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
- /* mark that the cached record is no longer valid */
- if (status != -ENETRESET && status != -EAGAIN) {
- spin_lock(&id_priv->lock);
- id_priv->is_valid_rec = 0;
- spin_unlock(&id_priv->lock);
- }
- }
-
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
@@ -3412,24 +3760,22 @@
unsigned char mc_map[MAX_ADDR_LEN];
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct sockaddr_in *sin = (struct sockaddr_in *) addr;
-#if defined(INET6)
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
-#endif
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
-#if defined(INET6)
} else if ((addr->sa_family == AF_INET6) &&
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else if (addr->sa_family == AF_IB) {
+ memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
} else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
-#endif
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -3444,30 +3790,20 @@
struct ib_sa_mcmember_rec rec;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
- int ret = 0;
+ int ret;
- ib_addr_get_mgid(dev_addr, &id_priv->rec.mgid);
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ &rec.mgid, &rec);
+ if (ret)
+ return ret;
- /* cache ipoib bc record */
- spin_lock(&id_priv->lock);
- if (!id_priv->is_valid_rec)
- ret = ib_sa_get_mcmember_rec(id_priv->id.device,
- id_priv->id.port_num,
- &id_priv->rec.mgid,
- &id_priv->rec);
- if (ret) {
- id_priv->is_valid_rec = 0;
- spin_unlock(&id_priv->lock);
+ ret = cma_set_qkey(id_priv, 0);
+ if (ret)
return ret;
- } else {
- rec = id_priv->rec;
- id_priv->is_valid_rec = 1;
- }
- spin_unlock(&id_priv->lock);
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
- if (id_priv->id.ps == RDMA_PS_UDP)
- rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ rec.qkey = cpu_to_be32(id_priv->qkey);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@@ -3489,7 +3825,7 @@
id_priv->id.port_num, &rec,
comp_mask, GFP_KERNEL,
cma_ib_mc_handler, mc);
- return PTR_RET(mc->multicast.ib);
+ return PTR_ERR_OR_ZERO(mc->multicast.ib);
}
static void iboe_mcast_work_handler(struct work_struct *work)
@@ -3535,7 +3871,7 @@
{
struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- int err;
+ int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
@@ -3567,13 +3903,30 @@
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
+ mc->multicast.ib->rec.ifindex = dev_addr->bound_dev_if;
+ mc->multicast.ib->rec.net = &init_net;
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &mc->multicast.ib->rec.port_gid);
+
+ if (addr->sa_family == AF_INET) {
+ mc->multicast.ib->rec.gid_type =
+ id_priv->cma_dev->default_gid_type;
+ if ((mc->multicast.ib->rec.gid_type == IB_GID_TYPE_ROCE_V2) ||
+ (mc->multicast.ib->rec.gid_type == IB_GID_TYPE_ROCE_V1_5))
+// err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true);
+ if (!err) {
+ mc->igmp_joined = true;
+ mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ }
+ } else {
+ mc->multicast.ib->rec.gid_type = IB_GID_TYPE_IB;
+ }
dev_put(ndev);
- if (!mc->multicast.ib->rec.mtu) {
+ if (err || !mc->multicast.ib->rec.mtu) {
err = -EINVAL;
goto out2;
}
- rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
+
work->id = id_priv;
work->mc = mc;
INIT_WORK(&work->work, iboe_mcast_work_handler);
@@ -3605,10 +3958,10 @@
if (!mc)
return -ENOMEM;
- memcpy(&mc->addr, addr, ip_addr_size(addr));
+ memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
-
+ mc->igmp_joined = false;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
@@ -3650,7 +4003,7 @@
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
list_del(&mc->list);
spin_unlock_irq(&id_priv->lock);
@@ -3665,6 +4018,23 @@
kfree(mc);
break;
case IB_LINK_LAYER_ETHERNET:
+/*
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr = &id->route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ mc->igmp_joined = false;
+ }
+*/
kref_put(&mc->mcref, release_mc);
break;
default:
@@ -3678,28 +4048,69 @@
}
EXPORT_SYMBOL(rdma_leave_multicast);
-static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
+static int cma_netdev_change(struct net_device *ndev, unsigned long event,
+ struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
struct cma_ndev_work *work;
+ enum rdma_link_layer dev_ll;
+ struct net_device *bounded_dev;
+ work_func_t work_func;
dev_addr = &id_priv->id.route.addr.dev_addr;
- if ((dev_addr->bound_dev_if == ndev->if_index) &&
- memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
- printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
- ndev->if_xname, &id_priv->id);
- work = kzalloc(sizeof *work, GFP_KERNEL);
- if (!work)
- return -ENOMEM;
+ switch (event) {
+/* BONDING related
+ case NETDEV_BONDING_FAILOVER:
+ if (!(ndev->flags & IFF_MASTER) ||
+ !(ndev->priv_flags & IFF_BONDING))
+ return 0;
+ if (dev_addr->bound_dev_if != ndev->ifindex)
+ return 0;
+ if (!memcmp(dev_addr->src_dev_addr,
+ ndev->dev_addr, ndev->addr_len))
+ return 0;
+ work_func = cma_ndev_addr_change_work_handler;
+ pr_info("RDMA CM addr change for %s used by id %p\n",
+ ndev->name, &id_priv->id);
+ break;
+*/
+ case NETDEV_UNREGISTER:
+ dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
+ IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+ if (dev_ll != IB_LINK_LAYER_ETHERNET)
+ return 0;
- INIT_WORK(&work->work, cma_ndev_work_handler);
- work->id = id_priv;
- work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
- atomic_inc(&id_priv->refcount);
- queue_work(cma_wq, &work->work);
+ if (dev_addr->bound_dev_if == ndev->if_index) {
+ work_func = cma_ndev_device_remove_work_handler;
+ break;
+ }
+
+ bounded_dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!bounded_dev)
+ return 0;
+
+/* BONDING
+ if (!netdev_has_upper_dev(ndev, bounded_dev))
+ return 0;
+*/
+
+ work_func = cma_ndev_device_remove_work_handler;
+ break;
+
+ default:
+ return 0;
}
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ INIT_WORK(&work->work, work_func);
+ work->id = id_priv;
+ atomic_inc(&id_priv->refcount);
+ queue_work(cma_wq, &work->work);
+
return 0;
}
@@ -3711,24 +4122,21 @@
struct rdma_id_private *id_priv;
int ret = NOTIFY_DONE;
-/* BONDING related, commented out until the bonding is resolved */
-#if 0
+/* BONDING related
if (dev_net(ndev) != &init_net)
return NOTIFY_DONE;
- if (event != NETDEV_BONDING_FAILOVER)
+ if (event != NETDEV_BONDING_FAILOVER &&
+ event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
-
- if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
- return NOTIFY_DONE;
-#endif
+*/
if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
list_for_each_entry(id_priv, &cma_dev->id_list, list) {
- ret = cma_netdev_change(ndev, id_priv);
+ ret = cma_netdev_change(ndev, event, id_priv);
if (ret)
goto out;
}
@@ -3742,6 +4150,32 @@
.notifier_call = cma_netdev_callback
};
+static int
+sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS)
+{
+ struct cma_device *cma_dev = arg1;
+ char buf[64];
+ int error;
+
+ strlcpy(buf, roce_gid_cache_type_str(
+ cma_get_default_gid_type(cma_dev)), sizeof(buf));
+
+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+ if (error != 0 || req->newptr == NULL)
+ goto done;
+
+ error = roce_gid_cache_parse_gid_str(buf);
+ if (error < 0) {
+ error = EINVAL;
+ goto done;
+ }
+
+ cma_set_default_gid_type(cma_dev, error);
+ error = 0;
+done:
+ return (error);
+}
+
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
@@ -3751,7 +4185,10 @@
if (!cma_dev)
return;
+ sysctl_ctx_init(&cma_dev->sysctl_ctx);
+
cma_dev->device = device;
+ cma_dev->default_gid_type = IB_GID_TYPE_IB;
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
@@ -3763,32 +4200,12 @@
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
-}
-
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
-{
- struct rdma_cm_event event;
- enum rdma_cm_state state;
- int ret = 0;
-
- /* Record that we want to remove the device */
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
- if (state == RDMA_CM_DESTROYING)
- return 0;
-
- cma_cancel_operation(id_priv, state);
- mutex_lock(&id_priv->handler_mutex);
- /* Check for destruction from another callback. */
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
- goto out;
-
- memset(&event, 0, sizeof event);
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
-out:
- mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx,
+ SYSCTL_CHILDREN(device->ports_parent->parent->oidp),
+ OID_AUTO, "default_roce_mode",
+ CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+ cma_dev, 0, &sysctl_cma_default_roce_mode, "A", "Default ROCE mode");
}
static void cma_process_remove(struct cma_device *cma_dev)
@@ -3832,21 +4249,18 @@
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ sysctl_ctx_free(&cma_dev->sysctl_ctx);
kfree(cma_dev);
}
static int __init cma_init(void)
{
- int ret = -ENOMEM;
+ int ret;
cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
return -ENOMEM;
- cma_free_wq = create_singlethread_workqueue("rdma_cm_fr");
- if (!cma_free_wq)
- goto err1;
-
ib_sa_register_client(&sa_client);
rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
@@ -3861,9 +4275,6 @@
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
-
- destroy_workqueue(cma_free_wq);
-err1:
destroy_workqueue(cma_wq);
return ret;
}
@@ -3874,8 +4285,6 @@
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
- flush_workqueue(cma_free_wq);
- destroy_workqueue(cma_free_wq);
destroy_workqueue(cma_wq);
idr_destroy(&sdp_ps);
idr_destroy(&tcp_ps);
Index: sys/ofed/drivers/infiniband/core/core_priv.h
===================================================================
--- sys/ofed/drivers/infiniband/core/core_priv.h
+++ sys/ofed/drivers/infiniband/core/core_priv.h
@@ -35,12 +35,25 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/netdevice.h>
#include <rdma/ib_verbs.h>
+struct cma_device;
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+enum ib_gid_type cma_get_default_gid_type(struct cma_device *cma_dev);
+void cma_set_default_gid_type(struct cma_device *cma_dev,
+ enum ib_gid_type default_gid_type);
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+
+extern struct workqueue_struct *roce_gid_mgmt_wq;
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
- u8, struct kobject *));
+ u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
@@ -49,4 +62,82 @@
int ib_cache_setup(void);
void ib_cache_cleanup(void);
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask);
+
+typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
+ struct net_device *idev, void *cookie);
+
+typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port,
+ struct net_device *idev, void *cookie);
+
+struct roce_netdev_list {
+ struct list_head list;
+ struct net_device *ndev;
+};
+
+void ib_dev_roce_ports_of_netdev(struct ib_device *ib_dev,
+ roce_netdev_filter filter,
+ void *filter_cookie,
+ roce_netdev_callback cb,
+ void *cookie);
+void ib_enum_roce_ports_of_netdev(roce_netdev_filter filter,
+ void *filter_cookie,
+ roce_netdev_callback cb,
+ void *cookie);
+
+const char *roce_gid_cache_type_str(enum ib_gid_type gid_type);
+int roce_gid_cache_parse_gid_str(const char *buf);
+
+int roce_gid_cache_get_gid(struct ib_device *ib_dev, u8 port, int index,
+ union ib_gid *gid, struct ib_gid_attr *attr);
+
+int roce_gid_cache_find_gid(struct ib_device *ib_dev, union ib_gid *gid,
+ enum ib_gid_type gid_type, struct net *net,
+ int if_index, u8 *port, u16 *index);
+
+int roce_gid_cache_find_gid_by_port(struct ib_device *ib_dev, union ib_gid *gid,
+ enum ib_gid_type gid_type, u8 port,
+ struct net *net, int if_index, u16 *index);
+
+int roce_gid_cache_find_gid_by_filter(struct ib_device *ib_dev,
+ union ib_gid *gid,
+ u8 port,
+ bool (*filter)(const union ib_gid *gid,
+ const struct ib_gid_attr *,
+ void *),
+ void *context,
+ u16 *index);
+
+int roce_gid_cache_is_active(struct ib_device *ib_dev, u8 port);
+
+enum roce_gid_cache_default_mode {
+ ROCE_GID_CACHE_DEFAULT_MODE_SET,
+ ROCE_GID_CACHE_DEFAULT_MODE_DELETE
+};
+
+int roce_gid_cache_set_default_gid(struct ib_device *ib_dev, u8 port,
+ struct net_device *ndev,
+ unsigned long gid_type_mask,
+ enum roce_gid_cache_default_mode mode);
+
+int roce_gid_cache_setup(void);
+void roce_gid_cache_cleanup(void);
+
+int roce_add_gid(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr);
+
+int roce_del_gid(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr);
+
+int roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+ struct net_device *ndev);
+
+int roce_gid_mgmt_init(void);
+void roce_gid_mgmt_cleanup(void);
+
+int roce_rescan_device(struct ib_device *ib_dev);
+int roce_sync_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+ struct list_head *list);
+
#endif /* _CORE_PRIV_H */
Index: sys/ofed/drivers/infiniband/core/device.c
===================================================================
--- sys/ofed/drivers/infiniband/core/device.c
+++ sys/ofed/drivers/infiniband/core/device.c
@@ -32,17 +32,18 @@
*/
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mutex.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include "core_priv.h"
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("core kernel InfiniBand API");
-MODULE_LICENSE("Dual BSD/GPL");
+/* core kernel InfiniBand API */
struct ib_client_data {
struct list_head list;
@@ -173,14 +174,9 @@
*/
struct ib_device *ib_alloc_device(size_t size)
{
- struct ib_device *dev;
-
BUG_ON(size < sizeof (struct ib_device));
- dev = kzalloc(size, GFP_KERNEL);
- spin_lock_init(&dev->cmd_perf_lock);
-
- return dev;
+ return kzalloc(size, GFP_KERNEL);
}
EXPORT_SYMBOL(ib_alloc_device);
@@ -264,6 +260,39 @@
return ret;
}
+static void ib_device_complete_cb(struct kref *kref)
+{
+ struct ib_device *device = container_of(kref, struct ib_device,
+ refcount);
+
+ if (device->reg_state >= IB_DEV_UNREGISTERING)
+ complete(&device->free);
+}
+
+/**
+ * ib_device_hold - increase the reference count of device
+ * @device: ib device to prevent from being free'd
+ *
+ * Prevent the device from being free'd.
+ */
+void ib_device_hold(struct ib_device *device)
+{
+ kref_get(&device->refcount);
+}
+EXPORT_SYMBOL(ib_device_hold);
+
+/**
+ * ib_device_put - decrease the reference count of device
+ * @device: allows this device to be free'd
+ *
+ * Puts the ib_device and allows it to be free'd.
+ */
+int ib_device_put(struct ib_device *device)
+{
+ return kref_put(&device->refcount, ib_device_complete_cb);
+}
+EXPORT_SYMBOL(ib_device_put);
+
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
@@ -315,6 +344,9 @@
list_add_tail(&device->core_list, &device_list);
+ kref_init(&device->refcount);
+ init_completion(&device->free);
+
device->reg_state = IB_DEV_REGISTERED;
{
@@ -345,6 +377,8 @@
mutex_lock(&device_mutex);
+ device->reg_state = IB_DEV_UNREGISTERING;
+
list_for_each_entry_reverse(client, &client_list, list)
if (client->remove)
client->remove(device);
@@ -358,6 +392,9 @@
ib_device_unregister_sysfs(device);
+ ib_device_put(device);
+ wait_for_completion(&device->free);
+
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
kfree(context);
@@ -581,6 +618,7 @@
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
+ memset(port_attr, 0, sizeof(*port_attr));
return device->query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
@@ -591,17 +629,104 @@
* @port_num:Port number to query
* @index:GID table index to query
* @gid:Returned GID
+ * @attr: Returned GID's attribute (only in RoCE)
*
* ib_query_gid() fetches the specified GID table entry.
*/
int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid)
+ u8 port_num, int index, union ib_gid *gid,
+ struct ib_gid_attr *attr)
{
+ if (!ib_cache_use_roce_gid_cache(device, port_num))
+ return roce_gid_cache_get_gid(device, port_num, index, gid,
+ attr);
+
+ if (attr)
+ return -EINVAL;
+
return device->query_gid(device, port_num, index, gid);
}
EXPORT_SYMBOL(ib_query_gid);
/**
+ * ib_dev_roce_ports_of_netdev - enumerate RoCE ports of ibdev in
+ * respect of netdev
+ * @ib_dev : IB device we want to query
+ * @filter: Should we call the callback?
+ * @filter_cookie: Cookie passed to filter
+ * @cb: Callback to call for each found RoCE ports
+ * @cookie: Cookie passed back to the callback
+ *
+ * Enumerates all of the physical RoCE ports of ib_dev RoCE ports
+ * which are relaying Ethernet packets to a specific
+ * (possibly virtual) netdevice according to filter.
+ */
+void ib_dev_roce_ports_of_netdev(struct ib_device *ib_dev,
+ roce_netdev_filter filter,
+ void *filter_cookie,
+ roce_netdev_callback cb,
+ void *cookie)
+{
+ u8 port;
+
+ if (ib_dev->modify_gid)
+ for (port = start_port(ib_dev); port <= end_port(ib_dev);
+ port++)
+ if (ib_dev->get_link_layer(ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ struct net_device *idev = NULL;
+
+ rcu_read_lock();
+ if (ib_dev->get_netdev)
+ idev = ib_dev->get_netdev(ib_dev, port);
+
+ /*
+ if (idev &&
+ idev->reg_state >= NETREG_UNREGISTERED)
+ idev = NULL;
+ */
+
+ if (idev)
+ dev_hold(idev);
+
+ rcu_read_unlock();
+
+ if (filter(ib_dev, port, idev, filter_cookie))
+ cb(ib_dev, port, idev, cookie);
+
+ if (idev)
+ dev_put(idev);
+ }
+}
+
+/**
+ * ib_enum_roce_ports_of_netdev - enumerate RoCE ports of a netdev
+ * @filter: Should we call the callback?
+ * @filter_cookie: Cookie passed to filter
+ * @cb: Callback to call for each found RoCE ports
+ * @cookie: Cookie passed back to the callback
+ *
+ * Enumerates all of the physical RoCE ports which are relaying
+ * Ethernet packets to a specific (possibly virtual) netdevice
+ * according to filter.
+ */
+void ib_enum_roce_ports_of_netdev(roce_netdev_filter filter,
+ void *filter_cookie,
+ roce_netdev_callback cb,
+ void *cookie)
+{
+ struct ib_device *dev;
+
+ mutex_lock(&device_mutex);
+
+ list_for_each_entry(dev, &device_list, core_list)
+ ib_dev_roce_ports_of_netdev(dev, filter, filter_cookie, cb,
+ cookie);
+
+ mutex_unlock(&device_mutex);
+}
+
+/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
* @port_num:Port number to query
@@ -669,19 +794,32 @@
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: Type of GID.
+ * @net: The namespace to search this GID in (RoCE only).
+ * Valid only if if_index != 0.
+ * @if_index: The if_index assigned with this GID (RoCE only).
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index)
+ enum ib_gid_type gid_type, struct net *net,
+ int if_index, u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
+ if (device->cache.roce_gid_cache &&
+ !roce_gid_cache_find_gid(device, gid, gid_type, net, if_index,
+ port_num, index))
+ return 0;
+
for (port = start_port(device); port <= end_port(device); ++port) {
+ if (!ib_cache_use_roce_gid_cache(device, port))
+ continue;
+
for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
- ret = ib_query_gid(device, port, i, &tmp_gid);
+ ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
@@ -750,6 +888,8 @@
goto err;
}
+ roce_gid_cache_setup();
+
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
@@ -768,6 +908,7 @@
static void __exit ib_core_cleanup(void)
{
+ roce_gid_cache_cleanup();
ib_cache_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
Index: sys/ofed/drivers/infiniband/core/iwcm.c
===================================================================
--- sys/ofed/drivers/infiniband/core/iwcm.c
+++ sys/ofed/drivers/infiniband/core/iwcm.c
@@ -59,9 +59,7 @@
#include "iwcm.h"
-MODULE_AUTHOR("Tom Tucker");
-MODULE_DESCRIPTION("iWARP CM");
-MODULE_LICENSE("Dual BSD/GPL");
+/* iWARP CM */
static struct workqueue_struct *iwcm_wq;
struct iwcm_work {
Index: sys/ofed/drivers/infiniband/core/mad.c
===================================================================
--- sys/ofed/drivers/infiniband/core/mad.c
+++ sys/ofed/drivers/infiniband/core/mad.c
@@ -47,10 +47,7 @@
#include "smi.h"
#include "agent.h"
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("kernel IB MAD API");
-MODULE_AUTHOR("Hal Rosenstock");
-MODULE_AUTHOR("Sean Hefty");
+/* kernel IB MAD API */
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
@@ -65,7 +62,6 @@
static struct list_head ib_mad_port_list;
static u32 ib_mad_client_id = 0;
-
/*
* Timeout FIFO (tf) param
*/
@@ -106,7 +102,6 @@
static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr,
u32 timeout_ms, u32 retries_left);
-
/*
* Timeout FIFO functions - implements FIFO with timeout mechanism
*/
@@ -223,7 +218,7 @@
INIT_LIST_HEAD(&tf->fifo_head);
init_timer(&tf->timer);
INIT_WORK(&tf->work, timeout_handler_task);
- tf->timer.data = (unsigned long) tf;
+ tf->timer.data = (unsigned long)tf;
tf->timer.function = activate_timeout_handler_task;
tf->timer.expires = jiffies;
tf->fifo_size = fifo_size;
@@ -295,7 +290,6 @@
unsigned long flags;
unsigned long time_left;
struct tf_entry *tmp, *tmp1;
- bool found = false;
spin_lock_irqsave(&tf->lists_lock, flags);
if (list_empty(&tf->fifo_head)) {
@@ -304,13 +298,11 @@
}
list_for_each_entry(tmp, &tf->fifo_head, fifo_list) {
- if (!tmp->canceled) {
- found = true;
+ if (!tmp->canceled)
break;
- }
}
- if (!found) {
+ if (tmp->canceled) {
spin_unlock_irqrestore(&tf->lists_lock, flags);
return NULL;
}
@@ -330,7 +322,7 @@
spin_unlock_irqrestore(&tf->lists_lock, flags);
time_left = tmp->exp_time - jiffies;
- if ((long) time_left <= 0)
+ if ((long)time_left <= 0)
time_left = 0;
*time_left_ms = jiffies_to_msecs(time_left);
@@ -1056,7 +1048,7 @@
*/
cancel_mads(mad_agent_priv);
port_priv = mad_agent_priv->qp_info->port_priv;
- cancel_delayed_work_sync(&mad_agent_priv->timed_work);
+ cancel_delayed_work(&mad_agent_priv->timed_work);
spin_lock_irqsave(&port_priv->reg_lock, flags);
remove_mad_reg_req(mad_agent_priv);
@@ -1100,18 +1092,18 @@
struct ib_mad_snoop_private *mad_snoop_priv;
if (!IS_ERR(mad_agent)) {
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
+ /* If the TID is zero, the agent can only snoop. */
+ if (mad_agent->hi_tid) {
+ mad_agent_priv = container_of(mad_agent,
struct ib_mad_agent_private,
agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
+ unregister_mad_agent(mad_agent_priv);
+ } else {
+ mad_snoop_priv = container_of(mad_agent,
struct ib_mad_snoop_private,
agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
+ unregister_mad_snoop(mad_snoop_priv);
+ }
}
return 0;
@@ -1238,7 +1230,7 @@
if (smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
IB_SMI_DISCARD) {
ret = -EINVAL;
- printk(KERN_ERR PFX "Invalid directed route\n");
+ dev_err(&device->dev, "Invalid directed route\n");
goto out;
}
@@ -1250,7 +1242,7 @@
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -1258,7 +1250,7 @@
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- printk(KERN_ERR PFX "No memory for local response MAD\n");
+ dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -1369,9 +1361,9 @@
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
- "alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
+ dev_err(&send_buf->mad_agent->device->dev,
+ "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
+ sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -1557,17 +1549,18 @@
if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
return -ENOMEM;
+ mad_send_wr->header_mapping = sge[0].addr;
+
sge[1].addr = ib_dma_map_single(mad_agent->device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
-
if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
- ret = -ENOMEM;
- goto dma1_err;
+ ib_dma_unmap_single(mad_agent->device,
+ mad_send_wr->header_mapping,
+ sge[0].length, DMA_TO_DEVICE);
+ return -ENOMEM;
}
-
- mad_send_wr->header_mapping = sge[0].addr;
mad_send_wr->payload_mapping = sge[1].addr;
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -1585,17 +1578,14 @@
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
-
- if (!ret)
- return 0;
-
+ if (ret) {
ib_dma_unmap_single(mad_agent->device,
mad_send_wr->header_mapping,
- sge[1].length, DMA_TO_DEVICE);
-dma1_err:
+ sge[0].length, DMA_TO_DEVICE);
ib_dma_unmap_single(mad_agent->device,
mad_send_wr->payload_mapping,
- sge[0].length, DMA_TO_DEVICE);
+ sge[1].length, DMA_TO_DEVICE);
+ }
return ret;
}
@@ -1617,6 +1607,11 @@
/* Reference MAD agent until send completes */
atomic_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ if (mad_agent_priv->send_list_closed) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ deref_mad_agent(mad_agent_priv);
+ return -EIO;
+ }
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -1627,7 +1622,7 @@
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_del(&mad_send_wr->agent_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
}
return ret;
@@ -1702,29 +1697,29 @@
if (ret < 0)
goto error;
} else {
- /* Reference MAD agent until send completes */
- atomic_inc(&mad_agent_priv->refcount);
- spin_lock_irqsave(&mad_agent_priv->lock, flags);
- list_add_tail(&mad_send_wr->agent_list,
- &mad_agent_priv->send_list);
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
-
- if (mad_agent_priv->agent.rmpp_version) {
- ret = ib_send_rmpp_mad(mad_send_wr);
- if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
- ret = ib_send_mad(mad_send_wr);
- } else
- ret = ib_send_mad(mad_send_wr);
- if (ret < 0) {
- /* Fail send request */
+ /* Reference MAD agent until send completes */
+ atomic_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- list_del(&mad_send_wr->agent_list);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
- goto error;
+
+ if (mad_agent_priv->agent.rmpp_version) {
+ ret = ib_send_rmpp_mad(mad_send_wr);
+ if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
+ ret = ib_send_mad(mad_send_wr);
+ } else
+ ret = ib_send_mad(mad_send_wr);
+ if (ret < 0) {
+ /* Fail send request */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ atomic_dec(&mad_agent_priv->refcount);
+ goto error;
+ }
}
}
- }
return 0;
error:
if (bad_send_buf)
@@ -1774,7 +1769,8 @@
int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
struct ib_wc *wc)
{
- printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+ dev_err(&mad_agent->device->dev,
+ "ib_process_mad_wc() not implemented yet\n");
return 0;
}
EXPORT_SYMBOL(ib_process_mad_wc);
@@ -1786,7 +1782,7 @@
for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
- printk(KERN_ERR PFX "Method %d already in use\n", i);
+ pr_err("Method %d already in use\n", i);
return -EINVAL;
}
}
@@ -1798,8 +1794,7 @@
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
if (!*method) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_method_table\n");
+ pr_err("No memory for ib_mad_mgmt_method_table\n");
return -ENOMEM;
}
@@ -1894,8 +1889,8 @@
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1961,8 +1956,8 @@
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
if (!vendor) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class_table\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class_table\n");
goto error1;
}
@@ -1972,8 +1967,8 @@
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
if (!vendor_class) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_mgmt_vendor_class\n");
+ dev_err(&agent_priv->agent.device->dev,
+ "No memory for ib_mad_mgmt_vendor_class\n");
goto error2;
}
@@ -2004,7 +1999,7 @@
goto check_in_use;
}
}
- printk(KERN_ERR PFX "All OUI slots in use\n");
+ dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
goto error3;
check_in_use:
@@ -2215,9 +2210,9 @@
if (mad_agent->agent.recv_handler)
atomic_inc(&mad_agent->refcount);
else {
- printk(KERN_NOTICE PFX "No receive handler for client "
- "%p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
+ dev_info(&port_priv->device->dev,
+ "No receive handler for client %p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
mad_agent = NULL;
}
}
@@ -2233,8 +2228,8 @@
/* Make sure MAD base version is understood */
if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
- printk(KERN_ERR PFX "MAD received with unsupported base "
- "version %d\n", mad->mad_hdr.base_version);
+ pr_err("MAD received with unsupported base version %d\n",
+ mad->mad_hdr.base_version);
goto out;
}
@@ -2309,7 +2304,7 @@
((1 << lmc) - 1)));
} else {
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid))
+ attr.grh.sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
@@ -2486,8 +2481,8 @@
response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!response) {
- printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
- "for response buffer\n");
+ dev_err(&port_priv->device->dev,
+ "ib_mad_recv_done_handler no memory for response buffer\n");
goto out;
}
@@ -2754,7 +2749,8 @@
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
&bad_send_wr);
if (ret) {
- printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "ib_post_send failed: %d\n", ret);
mad_send_wr = queued_send_wr;
wc->status = IB_WC_LOC_QP_OP_ERR;
goto retry;
@@ -2826,8 +2822,9 @@
IB_QP_STATE | IB_QP_CUR_STATE);
kfree(attr);
if (ret)
- printk(KERN_ERR PFX "mad_error_handler - "
- "ib_modify_qp to RTS : %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "mad_error_handler - ib_modify_qp to RTS : %d\n",
+ ret);
else
mark_sends_for_retry(qp_info);
}
@@ -2835,6 +2832,10 @@
}
}
+enum {
+ IB_MAD_COMP_HANDLER_QUOTA = 100
+};
+
/*
* IB MAD completion callback
*/
@@ -2842,10 +2843,9 @@
{
struct ib_mad_port_private *port_priv;
struct ib_wc wc;
+ int quota = IB_MAD_COMP_HANDLER_QUOTA;
port_priv = container_of(work, struct ib_mad_port_private, work);
- ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
@@ -2861,7 +2861,13 @@
}
} else
mad_error_handler(port_priv, &wc);
+ if (!quota--) {
+ if (!queue_work(port_priv->wq, &port_priv->work))
+ pr_warn("%s-%d: fatal: failed to requeue work\n", __func__, __LINE__);
+ return;
+ }
}
+ ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
}
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2875,6 +2881,7 @@
cancel_sa_cc_mads(mad_agent_priv);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ mad_agent_priv->send_list_closed = 1;
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&mad_agent_priv->send_list, agent_list) {
if (mad_send_wr->status == IB_WC_SUCCESS) {
@@ -2995,7 +3002,8 @@
if (local->mad_priv) {
recv_mad_agent = local->recv_mad_agent;
if (!recv_mad_agent) {
- printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+ dev_err(&mad_agent_priv->agent.device->dev,
+ "No receive MAD agent for local completion\n");
free_mad = 1;
goto local_send_completion;
}
@@ -3178,7 +3186,8 @@
} else {
mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
if (!mad_priv) {
- printk(KERN_ERR PFX "No memory for receive buffer\n");
+ dev_err(&qp_info->port_priv->device->dev,
+ "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -3191,11 +3200,8 @@
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
ret = -ENOMEM;
- kmem_cache_free(ib_mad_cache, mad_priv);
- printk(KERN_ERR PFX "ib_dma_map_single failed\n");
break;
}
-
mad_priv->header.mapping = sg_list.addr;
recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
@@ -3217,7 +3223,8 @@
sizeof mad_priv->header,
DMA_FROM_DEVICE);
kmem_cache_free(ib_mad_cache, mad_priv);
- printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+ dev_err(&qp_info->port_priv->device->dev,
+ "ib_post_recv failed: %d\n", ret);
break;
}
} while (post);
@@ -3269,16 +3276,17 @@
int ret, i;
struct ib_qp_attr *attr;
struct ib_qp *qp;
- u16 pkey_index = 0;
+ u16 pkey_index;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
- printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't kmalloc ib_qp_attr\n");
return -ENOMEM;
}
ret = ib_find_pkey(port_priv->device, port_priv->port_num,
- 0xFFFF, &pkey_index);
+ IB_DEFAULT_PKEY_FULL, &pkey_index);
if (ret)
pkey_index = 0;
@@ -3297,16 +3305,18 @@
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "INIT: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to INIT: %d\n",
+ i, ret);
goto out;
}
attr->qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTR: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTR: %d\n",
+ i, ret);
goto out;
}
@@ -3314,16 +3324,18 @@
attr->sq_psn = IB_MAD_SEND_Q_PSN;
ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret) {
- printk(KERN_ERR PFX "Couldn't change QP%d state to "
- "RTS: %d\n", i, ret);
+ dev_err(&port_priv->device->dev,
+ "Couldn't change QP%d state to RTS: %d\n",
+ i, ret);
goto out;
}
}
ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
if (ret) {
- printk(KERN_ERR PFX "Failed to request completion "
- "notification: %d\n", ret);
+ dev_err(&port_priv->device->dev,
+ "Failed to request completion notification: %d\n",
+ ret);
goto out;
}
@@ -3333,7 +3345,8 @@
ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
if (ret) {
- printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+ dev_err(&port_priv->device->dev,
+ "Couldn't post receive WRs\n");
goto out;
}
}
@@ -3347,7 +3360,8 @@
struct ib_mad_qp_info *qp_info = qp_context;
/* It's worse than that! He's dead, Jim! */
- printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+ dev_err(&qp_info->port_priv->device->dev,
+ "Fatal error (%d) on MAD QP (%d)\n",
event->event, qp_info->qp->qp_num);
}
@@ -3393,8 +3407,9 @@
qp_init_attr.event_handler = qp_event_handler;
qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
if (IS_ERR(qp_info->qp)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
- get_spl_qp_index(qp_type));
+ dev_err(&qp_info->port_priv->device->dev,
+ "Couldn't create ib_mad QP%d\n",
+ get_spl_qp_index(qp_type));
ret = PTR_ERR(qp_info->qp);
goto error;
}
@@ -3432,7 +3447,7 @@
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+ dev_err(&device->dev, "No memory for ib_mad_port_private\n");
return -ENOMEM;
}
@@ -3452,21 +3467,21 @@
ib_mad_thread_completion_handler,
NULL, port_priv, cq_size, 0);
if (IS_ERR(port_priv->cq)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+ dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
goto error3;
}
port_priv->pd = ib_alloc_pd(device);
if (IS_ERR(port_priv->pd)) {
- printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
goto error4;
}
port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(port_priv->mr)) {
- printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+ dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
ret = PTR_ERR(port_priv->mr);
goto error5;
}
@@ -3491,14 +3506,13 @@
if (sa_cc_init(&port_priv->sa_cc))
goto error9;
-
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
ret = ib_mad_port_start(port_priv);
if (ret) {
- printk(KERN_ERR PFX "Couldn't start port\n");
+ dev_err(&device->dev, "Couldn't start port\n");
goto error10;
}
@@ -3509,9 +3523,9 @@
list_del_init(&port_priv->port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- destroy_workqueue(port_priv->wq);
-error9:
sa_cc_destroy(&port_priv->sa_cc);
+error9:
+ destroy_workqueue(port_priv->wq);
error8:
destroy_mad_qp(&port_priv->qp_info[1]);
error7:
@@ -3544,7 +3558,7 @@
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- printk(KERN_ERR PFX "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -3583,14 +3597,12 @@
for (i = start; i <= end; i++) {
if (ib_mad_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
if (ib_agent_port_open(device, i)) {
- printk(KERN_ERR PFX "Couldn't open %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't open port %d for agents\n", i);
goto error_agent;
}
}
@@ -3598,20 +3610,17 @@
error_agent:
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
error:
i--;
while (i >= start) {
if (ib_agent_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, i);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n", i);
if (ib_mad_port_close(device, i))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, i);
+ dev_err(&device->dev, "Couldn't close port %d\n", i);
i--;
}
}
@@ -3632,12 +3641,12 @@
}
for (i = 0; i < num_ports; i++, cur_port++) {
if (ib_agent_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d "
- "for agents\n",
- device->name, cur_port);
+ dev_err(&device->dev,
+ "Couldn't close port %d for agents\n",
+ cur_port);
if (ib_mad_port_close(device, cur_port))
- printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, cur_port);
+ dev_err(&device->dev, "Couldn't close port %d\n",
+ cur_port);
}
}
@@ -3663,7 +3672,7 @@
SLAB_HWCACHE_ALIGN,
NULL);
if (!ib_mad_cache) {
- printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+ pr_err("Couldn't create ib_mad cache\n");
ret = -ENOMEM;
goto error1;
}
@@ -3671,7 +3680,7 @@
INIT_LIST_HEAD(&ib_mad_port_list);
if (ib_register_client(&mad_client)) {
- printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+ pr_err("Couldn't register ib_mad client\n");
ret = -EINVAL;
goto error2;
}
Index: sys/ofed/drivers/infiniband/core/mad_priv.h
===================================================================
--- sys/ofed/drivers/infiniband/core/mad_priv.h
+++ sys/ofed/drivers/infiniband/core/mad_priv.h
@@ -42,14 +42,11 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
-
-#define PFX "ib_mad: "
-
#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
/* QP and CQ parameters */
#define IB_MAD_QP_SEND_SIZE 128
-#define IB_MAD_QP_RECV_SIZE 512
+#define IB_MAD_QP_RECV_SIZE 4096
#define IB_MAD_QP_MIN_SIZE 64
#define IB_MAD_QP_MAX_SIZE 8192
#define IB_MAD_SEND_REQ_MAX_SG 2
@@ -110,6 +107,7 @@
atomic_t refcount;
struct completion comp;
+ int send_list_closed;
};
struct ib_mad_snoop_private {
Index: sys/ofed/drivers/infiniband/core/multicast.c
===================================================================
--- sys/ofed/drivers/infiniband/core/multicast.c
+++ sys/ofed/drivers/infiniband/core/multicast.c
@@ -40,23 +40,11 @@
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/random.h>
-#include <linux/moduleparam.h>
#include <linux/rbtree.h>
#include <rdma/ib_cache.h>
#include "sa.h"
-static int mcast_leave_retries = 3;
-
-/*static const struct kernel_param_ops retry_ops = {
- .set = param_set_int,
- .get = param_get_int,
-};
-
-module_param_cb(mcast_leave_retries, &retry_ops, &mcast_leave_retries, 0644);
-MODULE_PARM_DESC(mcast_leave_retries, "Number of retries for multicast leave "
- "requests before giving up (default: 3)");
-*/
static void mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device);
@@ -308,8 +296,8 @@
if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
return -EINVAL;
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
- IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
- src->mtu, dst->mtu))
+ IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
+ src->mtu, dst->mtu))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
src->traffic_class != dst->traffic_class)
@@ -317,14 +305,14 @@
if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
return -EINVAL;
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
- IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
- src->rate, dst->rate))
+ IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
+ src->rate, dst->rate))
return -EINVAL;
if (check_selector(comp_mask,
- IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
- IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
- dst->packet_life_time_selector,
- src->packet_life_time, dst->packet_life_time))
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ dst->packet_life_time_selector,
+ src->packet_life_time, dst->packet_life_time))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
return -EINVAL;
@@ -352,7 +340,7 @@
port->port_num, IB_MGMT_METHOD_SET,
&member->multicast.rec,
member->multicast.comp_mask,
- 3000, GFP_KERNEL, join_handler, group,
+ 1000, 3, GFP_KERNEL, join_handler, group,
&group->query);
if (ret >= 0) {
group->query_id = ret;
@@ -376,7 +364,7 @@
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_JOIN_STATE,
- 3000, GFP_KERNEL, leave_handler,
+ 1000, 3, GFP_KERNEL, leave_handler,
group, &group->query);
if (ret >= 0) {
group->query_id = ret;
@@ -540,17 +528,22 @@
if (status)
process_join_error(group, status);
else {
+ int mgids_changed, is_mgid0;
ib_find_pkey(group->port->dev->device, group->port->port_num,
be16_to_cpu(rec->pkey), &pkey_index);
spin_lock_irq(&group->port->lock);
- group->rec = *rec;
if (group->state == MCAST_BUSY &&
group->pkey_index == MCAST_INVALID_PKEY_INDEX)
group->pkey_index = pkey_index;
- if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+ mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
+ sizeof(group->rec.mgid));
+ group->rec = *rec;
+ if (mgids_changed) {
rb_erase(&group->node, &group->port->table);
- mcast_insert(group->port, group, 1);
+ is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
+ sizeof(mgid0));
+ mcast_insert(group->port, group, is_mgid0);
}
spin_unlock_irq(&group->port->lock);
}
@@ -565,12 +558,8 @@
if (status && group->retries > 0 &&
!send_leave(group, group->leave_state))
group->retries--;
- else {
- if (status && group->retries <= 0)
- printk(KERN_WARNING "reached max retry count. "
- "status=%d. Giving up\n", status);
+ else
mcast_work_handler(&group->work);
- }
}
static struct mcast_group *acquire_group(struct mcast_port *port,
@@ -593,7 +582,7 @@
if (!group)
return NULL;
- group->retries = mcast_leave_retries;
+ group->retries = 3;
group->port = port;
group->rec.mgid = *mgid;
group->pkey_index = MCAST_INVALID_PKEY_INDEX;
@@ -743,7 +732,22 @@
u16 gid_index;
u8 p;
- ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ switch (rdma_port_get_link_layer(device, port_num)) {
+ case IB_LINK_LAYER_ETHERNET:
+ ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+ rec->gid_type, port_num,
+ rec->net, rec->ifindex,
+ &gid_index);
+ break;
+ case IB_LINK_LAYER_INFINIBAND:
+ ret = ib_find_cached_gid(device, &rec->port_gid,
+ IB_GID_TYPE_IB, NULL, 0, &p,
+ &gid_index);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
if (ret)
return ret;
Index: sys/ofed/drivers/infiniband/core/peer_mem.c
===================================================================
--- sys/ofed/drivers/infiniband/core/peer_mem.c
+++ sys/ofed/drivers/infiniband/core/peer_mem.c
@@ -224,11 +224,15 @@
}
/* access to that peer client is under its lock - no extra lock is needed */
-unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
- void *context)
+int ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
+ void *context,
+ unsigned long *context_ticket)
{
struct core_ticket *core_ticket = kzalloc(sizeof(*core_ticket), GFP_KERNEL);
+ if (!core_ticket)
+ return -ENOMEM;
+
ib_peer_client->last_ticket++;
core_ticket->context = context;
core_ticket->key = ib_peer_client->last_ticket;
@@ -236,7 +240,8 @@
list_add_tail(&core_ticket->ticket_list,
&ib_peer_client->core_ticket_list);
- return core_ticket->key;
+ *context_ticket = core_ticket->key;
+ return 0;
}
int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client,
Index: sys/ofed/drivers/infiniband/core/roce_gid_cache.c
===================================================================
--- /dev/null
+++ sys/ofed/drivers/infiniband/core/roce_gid_cache.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <linux/netdevice.h>
+#include <rdma/ib_cache.h>
+
+#include "core_priv.h"
+
+union ib_gid zgid;
+EXPORT_SYMBOL_GPL(zgid);
+
+static const struct ib_gid_attr zattr;
+
+enum gid_attr_find_mask {
+ GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 0,
+ GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
+};
+
+static const char * const gid_type_str[] = {
+ [IB_GID_TYPE_IB] = "IB/RoCE V1",
+ [IB_GID_TYPE_ROCE_V2] = "RoCE V2",
+ [IB_GID_TYPE_ROCE_V1_5] = "RoCE V1.5",
+};
+
+static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+{
+ if (dev->if_addrlen != ETH_ALEN)
+ return -1;
+ memcpy(eui, IF_LLADDR(dev), 3);
+ memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
+
+ /* NOTE: The scope ID is added by the GID to IP conversion */
+
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ return 0;
+}
+
+static inline int start_port(struct ib_device *ib_dev)
+{
+ return (ib_dev->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+struct dev_put_rcu {
+ struct rcu_head rcu;
+ struct net_device *ndev;
+};
+
+const char *roce_gid_cache_type_str(enum ib_gid_type gid_type)
+{
+ if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+ return gid_type_str[gid_type];
+
+ return "Invalid GID type";
+}
+EXPORT_SYMBOL_GPL(roce_gid_cache_type_str);
+
+int roce_gid_cache_parse_gid_str(const char *buf)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+ if (gid_type_str[i] && !strcmp(buf, gid_type_str[i]))
+ return i;
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(roce_gid_cache_parse_gid_str);
+
+static void put_ndev(struct rcu_head *rcu)
+{
+ struct dev_put_rcu *put_rcu =
+ container_of(rcu, struct dev_put_rcu, rcu);
+
+ dev_put(put_rcu->ndev);
+ kfree(put_rcu);
+}
+
+static int write_gid(struct ib_device *ib_dev, u8 port,
+ struct ib_roce_gid_cache *cache, int ix,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ unsigned int orig_seq;
+ int ret;
+ struct dev_put_rcu *put_rcu;
+ struct net_device *old_net_dev;
+
+ orig_seq = cache->data_vec[ix].seq;
+ cache->data_vec[ix].seq = -1;
+ /* Ensure that all readers will see invalid sequence
+ * identifier before starting the actual GID update.
+ */
+ wmb();
+
+ ret = ib_dev->modify_gid(ib_dev, port, ix, gid, attr,
+ &cache->data_vec[ix].context);
+
+ old_net_dev = cache->data_vec[ix].attr.ndev;
+ if (old_net_dev && old_net_dev != attr->ndev) {
+ put_rcu = kmalloc(sizeof(*put_rcu), GFP_KERNEL);
+ if (put_rcu) {
+ put_rcu->ndev = old_net_dev;
+ call_rcu(&put_rcu->rcu, put_ndev);
+ } else {
+ pr_warn("roce_gid_cache: can't allocate rcu context, using synchronize\n");
+ synchronize_rcu();
+ dev_put(old_net_dev);
+ }
+ }
+ /* if modify_gid failed, just delete the old gid */
+ if (ret || !memcmp(gid, &zgid, sizeof(*gid))) {
+ gid = &zgid;
+ attr = &zattr;
+ cache->data_vec[ix].context = NULL;
+ }
+ memcpy(&cache->data_vec[ix].gid, gid, sizeof(*gid));
+ memcpy(&cache->data_vec[ix].attr, attr, sizeof(*attr));
+ if (cache->data_vec[ix].attr.ndev &&
+ cache->data_vec[ix].attr.ndev != old_net_dev)
+ dev_hold(cache->data_vec[ix].attr.ndev);
+
+ /* Ensure that all cached gid data updating is finished before
+ * marking the entry as available.
+ */
+ wmb();
+
+ if (++orig_seq == (unsigned int)-1)
+ orig_seq = 0;
+ ACCESS_ONCE(cache->data_vec[ix].seq) = orig_seq;
+
+ if (!ret) {
+ struct ib_event event;
+
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+ return ret;
+}
+
+static int find_gid(struct ib_roce_gid_cache *cache, union ib_gid *gid,
+ const struct ib_gid_attr *val, unsigned long mask)
+{
+ int i;
+ unsigned int orig_seq;
+
+ for (i = 0; i < cache->sz; i++) {
+ struct ib_gid_attr *attr = &cache->data_vec[i].attr;
+
+ orig_seq = cache->data_vec[i].seq;
+ if (orig_seq == -1U)
+ continue;
+ /* Make sure the sequence number we remeber was read
+ * before the gid cache entry content is read.
+ */
+ rmb();
+
+ if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+ attr->gid_type != val->gid_type)
+ continue;
+
+ if (memcmp(gid, &cache->data_vec[i].gid, sizeof(*gid)))
+ continue;
+
+ if (mask & GID_ATTR_FIND_MASK_NETDEV &&
+ attr->ndev != val->ndev)
+ continue;
+
+ /* We have a match, verify that the data we
+ * compared is valid. Make sure that the
+ * sequence number we read is the last to be
+ * read.
+ */
+ rmb();
+ if (orig_seq == ACCESS_ONCE(cache->data_vec[i].seq))
+ return i;
+ /* The sequence number changed under our feet,
+ * the GID entry is invalid. Continue to the
+ * next entry.
+ */
+ }
+
+ return -1;
+}
+
+static void make_default_gid(struct net_device *dev, union ib_gid *gid)
+{
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ addrconf_ifid_eui48(&gid->raw[8], dev);
+}
+
+int roce_add_gid(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct ib_roce_gid_cache *cache;
+ int ix;
+ int ret = 0;
+ struct net_device *idev;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return -ENOSYS;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+
+ if (!cache || !cache->active)
+ return -ENOSYS;
+
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return -EINVAL;
+
+ if (ib_dev->get_netdev) {
+ rcu_read_lock();
+ idev = ib_dev->get_netdev(ib_dev, port);
+ if (idev && attr->ndev != idev) {
+ union ib_gid default_gid;
+
+ /* Adding default GIDs in not permitted */
+ make_default_gid(idev, &default_gid);
+ if (!memcmp(gid, &default_gid, sizeof(*gid))) {
+ rcu_read_unlock();
+ return -EPERM;
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ mutex_lock(&cache->lock);
+
+ ix = find_gid(cache, gid, attr, GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV);
+ if (ix >= 0)
+ goto out_unlock;
+
+ ix = find_gid(cache, &zgid, NULL, 0);
+ if (ix < 0) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
+ write_gid(ib_dev, port, cache, ix, gid, attr);
+
+out_unlock:
+ mutex_unlock(&cache->lock);
+ return ret;
+}
+
+int roce_del_gid(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct ib_roce_gid_cache *cache;
+ union ib_gid default_gid;
+ int ix;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return 0;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+
+ if (!cache || !cache->active)
+ return -ENOSYS;
+
+ if (attr->ndev) {
+ /* Deleting default GIDs in not permitted */
+ make_default_gid(attr->ndev, &default_gid);
+ if (!memcmp(gid, &default_gid, sizeof(*gid)))
+ return -EPERM;
+ }
+
+ mutex_lock(&cache->lock);
+
+ ix = find_gid(cache, gid, attr,
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV);
+ if (ix < 0)
+ goto out_unlock;
+
+ write_gid(ib_dev, port, cache, ix, &zgid, &zattr);
+
+out_unlock:
+ mutex_unlock(&cache->lock);
+ return 0;
+}
+
+int roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+ struct net_device *ndev)
+{
+ struct ib_roce_gid_cache *cache;
+ int ix;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return 0;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+
+ if (!cache || !cache->active)
+ return -ENOSYS;
+
+ mutex_lock(&cache->lock);
+
+ for (ix = 0; ix < cache->sz; ix++)
+ if (cache->data_vec[ix].attr.ndev == ndev)
+ write_gid(ib_dev, port, cache, ix, &zgid, &zattr);
+
+ mutex_unlock(&cache->lock);
+ return 0;
+}
+
+int roce_sync_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+ struct list_head *list)
+{
+ struct ib_roce_gid_cache *cache;
+ int ix;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return 0;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+
+ if (!cache || !cache->active)
+ return -ENOSYS;
+
+ mutex_lock(&cache->lock);
+
+ for (ix = 0; ix < cache->sz; ix++) {
+ bool found = false;
+ struct roce_netdev_list *entry;
+
+ list_for_each_entry(entry, list, list) {
+ if (cache->data_vec[ix].attr.ndev == entry->ndev) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ write_gid(ib_dev, port, cache, ix, &zgid, &zattr);
+ }
+
+ mutex_unlock(&cache->lock);
+ return 0;
+}
+
+int roce_gid_cache_get_gid(struct ib_device *ib_dev, u8 port, int index,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct ib_roce_gid_cache *cache;
+ union ib_gid local_gid;
+ struct ib_gid_attr local_attr;
+ unsigned int orig_seq;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return -EINVAL;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+
+ if (!cache || !cache->active)
+ return -ENOSYS;
+
+ if (index < 0 || index >= cache->sz)
+ return -EINVAL;
+
+ orig_seq = ACCESS_ONCE(cache->data_vec[index].seq);
+ /* Make sure we read the sequence number before copying the
+ * gid to local storage. */
+ rmb();
+
+ memcpy(&local_gid, &cache->data_vec[index].gid, sizeof(local_gid));
+ memcpy(&local_attr, &cache->data_vec[index].attr, sizeof(local_attr));
+ /* Ensure the local copy completed reading before verifying
+ * the new sequence number. */
+ rmb();
+
+ if (orig_seq == -1 ||
+ orig_seq != ACCESS_ONCE(cache->data_vec[index].seq))
+ return -EAGAIN;
+
+ memcpy(gid, &local_gid, sizeof(*gid));
+ if (attr)
+ memcpy(attr, &local_attr, sizeof(*attr));
+ return 0;
+}
+
+static int _roce_gid_cache_find_gid(struct ib_device *ib_dev, union ib_gid *gid,
+ const struct ib_gid_attr *val,
+ unsigned long mask,
+ u8 *port, u16 *index)
+{
+ struct ib_roce_gid_cache *cache;
+ u8 p;
+ int local_index;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return -ENOENT;
+
+ for (p = 0; p < ib_dev->phys_port_cnt; p++) {
+ if (rdma_port_get_link_layer(ib_dev, p + start_port(ib_dev)) !=
+ IB_LINK_LAYER_ETHERNET)
+ continue;
+ cache = ib_dev->cache.roce_gid_cache[p];
+ if (!cache || !cache->active)
+ continue;
+ local_index = find_gid(cache, gid, val, mask);
+ if (local_index >= 0) {
+ if (index)
+ *index = local_index;
+ if (port)
+ *port = p + start_port(ib_dev);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int get_netdev_from_ifindex(struct net *net, int if_index,
+ struct ib_gid_attr *gid_attr_val)
+{
+ if (if_index && net) {
+ rcu_read_lock();
+ gid_attr_val->ndev = dev_get_by_index(net, if_index);
+ rcu_read_unlock();
+ if (gid_attr_val->ndev)
+ return GID_ATTR_FIND_MASK_NETDEV;
+ }
+ return 0;
+}
+
+int roce_gid_cache_find_gid(struct ib_device *ib_dev, union ib_gid *gid,
+ enum ib_gid_type gid_type, struct net *net,
+ int if_index, u8 *port, u16 *index)
+{
+ unsigned long mask = GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.gid_type = gid_type};
+
+ mask |= get_netdev_from_ifindex(net, if_index, &gid_attr_val);
+
+ return _roce_gid_cache_find_gid(ib_dev, gid, &gid_attr_val,
+ mask, port, index);
+}
+
+int roce_gid_cache_find_gid_by_port(struct ib_device *ib_dev, union ib_gid *gid,
+ enum ib_gid_type gid_type, u8 port,
+ struct net *net, int if_index, u16 *index)
+{
+ int local_index;
+ struct ib_roce_gid_cache *cache;
+ unsigned long mask = GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr val = {.gid_type = gid_type};
+
+ if (!ib_dev->cache.roce_gid_cache || port < start_port(ib_dev) ||
+ port >= (start_port(ib_dev) + ib_dev->phys_port_cnt))
+ return -ENOENT;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+ if (!cache || !cache->active)
+ return -ENOENT;
+
+ mask |= get_netdev_from_ifindex(net, if_index, &val);
+
+ local_index = find_gid(cache, gid, &val, mask);
+ if (local_index >= 0) {
+ if (index)
+ *index = local_index;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int roce_gid_cache_find_gid_by_filter(struct ib_device *ib_dev,
+ union ib_gid *gid,
+ u8 port,
+ bool (*filter)(const union ib_gid *,
+ const struct ib_gid_attr *,
+ void *),
+ void *context,
+ u16 *index)
+{
+ struct ib_roce_gid_cache *cache;
+ unsigned int i;
+ bool found = false;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return -ENOSYS;
+
+ if (port < start_port(ib_dev) ||
+ port > start_port(ib_dev) + ib_dev->phys_port_cnt ||
+ rdma_port_get_link_layer(ib_dev, port) !=
+ IB_LINK_LAYER_ETHERNET)
+ return -ENOSYS;
+
+ cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];
+
+ if (!cache || !cache->active)
+ return -ENOENT;
+
+ for (i = 0; i < cache->sz; i++) {
+ unsigned int orig_seq;
+ struct ib_gid_attr attr;
+
+ orig_seq = cache->data_vec[i].seq;
+ if (orig_seq == -1)
+ continue;
+ /* Make sure the sequence number we remeber was read
+ * before the gid cache entry content is read.
+ */
+ rmb();
+
+ if (memcmp(gid, &cache->data_vec[i].gid, sizeof(*gid)))
+ continue;
+
+ memcpy(&attr, &cache->data_vec[i].attr, sizeof(attr));
+
+ rcu_read_lock();
+
+ /* Make sure we finished reading the attribute */
+ rmb();
+ if (orig_seq == ACCESS_ONCE(cache->data_vec[i].seq))
+ if (!filter || filter(gid, &attr, context))
+ found = true;
+
+ rcu_read_unlock();
+
+ if (found)
+ break;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ if (index)
+ *index = i;
+ return 0;
+}
+
+static struct ib_roce_gid_cache *alloc_roce_gid_cache(int sz)
+{
+ struct ib_roce_gid_cache *cache =
+ kzalloc(sizeof(struct ib_roce_gid_cache), GFP_KERNEL);
+ if (!cache)
+ return NULL;
+
+ cache->data_vec = kcalloc(sz, sizeof(*cache->data_vec), GFP_KERNEL);
+ if (!cache->data_vec)
+ goto err_free_cache;
+
+ mutex_init(&cache->lock);
+
+ cache->sz = sz;
+
+ return cache;
+
+err_free_cache:
+ kfree(cache);
+ return NULL;
+}
+
+static void free_roce_gid_cache(struct ib_device *ib_dev, u8 port)
+{
+ int i;
+ struct ib_roce_gid_cache *cache = ib_dev->cache.roce_gid_cache[port - 1];
+
+ if (!cache)
+ return;
+
+ for (i = 0; i < cache->sz; ++i) {
+ if (memcmp(&cache->data_vec[i].gid, &zgid,
+ sizeof(cache->data_vec[i].gid)))
+ write_gid(ib_dev, port, cache, i, &zgid, &zattr);
+ }
+ kfree(cache->data_vec);
+ kfree(cache);
+}
+
+static void set_roce_gid_cache_active(struct ib_roce_gid_cache *cache,
+ int active)
+{
+ if (!cache)
+ return;
+
+ cache->active = active;
+}
+
+int roce_gid_cache_set_default_gid(struct ib_device *ib_dev, u8 port,
+ struct net_device *ndev,
+ unsigned long gid_type_mask,
+ enum roce_gid_cache_default_mode mode)
+{
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
+ struct ib_roce_gid_cache *cache;
+ unsigned int gid_type;
+ unsigned int gid_index = 0;
+
+ cache = ib_dev->cache.roce_gid_cache[port - 1];
+
+ if (!cache)
+ goto done;
+
+ make_default_gid(ndev, &gid);
+ memset(&gid_attr, 0, sizeof(gid_attr));
+ gid_attr.ndev = ndev;
+ for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+ union ib_gid current_gid;
+ struct ib_gid_attr current_gid_attr;
+
+ if (1UL << gid_type & ~gid_type_mask)
+ continue;
+
+ gid_attr.gid_type = gid_type;
+
+ if (!roce_gid_cache_get_gid(ib_dev, port, gid_index,
+ &current_gid, &current_gid_attr) &&
+ !memcmp(&gid, &current_gid, sizeof(gid)) &&
+ !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)) &&
+ mode == ROCE_GID_CACHE_DEFAULT_MODE_SET) {
+ ++gid_index; /* XXX bugfix */
+ continue;
+ }
+
+ mutex_lock(&cache->lock);
+ if (write_gid(ib_dev, port, cache, gid_index, &zgid, &zattr)) {
+ pr_warn("roce_gid_cache: can't delete index %d for default gid %pI6\n",
+ gid_index, gid.raw);
+ mutex_unlock(&cache->lock);
+ ++gid_index;
+ continue;
+ }
+
+ if (mode == ROCE_GID_CACHE_DEFAULT_MODE_SET &&
+ write_gid(ib_dev, port, cache, gid_index, &gid, &gid_attr))
+ pr_warn("roce_gid_cache: unable to add default gid %pI6\n",
+ gid.raw);
+
+ mutex_unlock(&cache->lock);
+ ++gid_index;
+ }
+done:
+ return (gid_index);
+}
+
+static int roce_gid_cache_setup_one(struct ib_device *ib_dev)
+{
+ u8 port;
+ int err = 0;
+
+ if (!ib_dev->modify_gid)
+ return -ENOSYS;
+
+ ib_dev->cache.roce_gid_cache =
+ kcalloc(ib_dev->phys_port_cnt,
+ sizeof(*ib_dev->cache.roce_gid_cache), GFP_KERNEL);
+
+ if (!ib_dev->cache.roce_gid_cache) {
+ pr_warn("failed to allocate roce addr cache for %s\n",
+ ib_dev->name);
+ return -ENOMEM;
+ }
+
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ if (rdma_port_get_link_layer(ib_dev, port + start_port(ib_dev))
+ != IB_LINK_LAYER_ETHERNET)
+ continue;
+ ib_dev->cache.roce_gid_cache[port] =
+ alloc_roce_gid_cache(ib_dev->gid_tbl_len[port]);
+ if (!ib_dev->cache.roce_gid_cache[port]) {
+ err = -ENOMEM;
+ goto rollback_cache_setup;
+ }
+ }
+ return 0;
+
+rollback_cache_setup:
+ for (port = 1; port <= ib_dev->phys_port_cnt; port++)
+ free_roce_gid_cache(ib_dev, port);
+
+ kfree(ib_dev->cache.roce_gid_cache);
+ ib_dev->cache.roce_gid_cache = NULL;
+ return err;
+}
+
+static void roce_gid_cache_cleanup_one(struct ib_device *ib_dev)
+{
+ u8 port;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return;
+
+ for (port = 1; port <= ib_dev->phys_port_cnt; port++)
+ free_roce_gid_cache(ib_dev, port);
+
+ kfree(ib_dev->cache.roce_gid_cache);
+ ib_dev->cache.roce_gid_cache = NULL;
+}
+
+static void roce_gid_cache_set_active_state(struct ib_device *ib_dev,
+ int active)
+{
+ u8 port;
+
+ if (!ib_dev->cache.roce_gid_cache)
+ return;
+
+ for (port = 0; port < ib_dev->phys_port_cnt; port++)
+ set_roce_gid_cache_active(ib_dev->cache.roce_gid_cache[port],
+ active);
+}
+
+int roce_gid_cache_is_active(struct ib_device *ib_dev, u8 port)
+{
+ return ib_dev->cache.roce_gid_cache &&
+ ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)]->active;
+}
+
+static void roce_gid_cache_client_setup_one(struct ib_device *ib_dev)
+{
+ if (!roce_gid_cache_setup_one(ib_dev)) {
+ roce_gid_cache_set_active_state(ib_dev, 1);
+ if (roce_rescan_device(ib_dev)) {
+ roce_gid_cache_set_active_state(ib_dev, 0);
+ roce_gid_cache_cleanup_one(ib_dev);
+ }
+ }
+}
+
+static void roce_gid_cache_client_cleanup_work_handler(struct work_struct *work)
+{
+ struct ib_cache *ib_cache = container_of(work, struct ib_cache,
+ roce_gid_cache_cleanup_work);
+ struct ib_device *ib_dev = container_of(ib_cache, struct ib_device,
+ cache);
+
+ /* Make sure no gid update task is still referencing this device */
+ flush_workqueue(roce_gid_mgmt_wq);
+
+ /* No need to flush the system wq, even though we use it in
+ * roce_rescan_device because we are guarenteed to run this
+ * on the system_wq after roce_rescan_device.
+ */
+
+ roce_gid_cache_cleanup_one(ib_dev);
+ ib_device_put(ib_dev);
+}
+
+static void roce_gid_cache_client_cleanup_one_work(struct ib_device *ib_dev)
+{
+ ib_device_hold(ib_dev);
+ INIT_WORK(&ib_dev->cache.roce_gid_cache_cleanup_work,
+ roce_gid_cache_client_cleanup_work_handler);
+ schedule_work(&ib_dev->cache.roce_gid_cache_cleanup_work);
+}
+
+static void roce_gid_cache_client_cleanup_one(struct ib_device *ib_dev)
+{
+ roce_gid_cache_set_active_state(ib_dev, 0);
+ roce_gid_cache_client_cleanup_one_work(ib_dev);
+}
+
+static struct ib_client cache_client = {
+ .name = "roce_gid_cache",
+ .add = roce_gid_cache_client_setup_one,
+ .remove = roce_gid_cache_client_cleanup_one
+};
+
+int __init roce_gid_cache_setup(void)
+{
+ roce_gid_mgmt_init();
+
+ return ib_register_client(&cache_client);
+}
+
+void __exit roce_gid_cache_cleanup(void)
+{
+ ib_unregister_client(&cache_client);
+
+ roce_gid_mgmt_cleanup();
+
+ rcu_barrier();
+}
Index: sys/ofed/drivers/infiniband/core/roce_gid_mgmt.c
===================================================================
--- /dev/null
+++ sys/ofed/drivers/infiniband/core/roce_gid_mgmt.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "core_priv.h"
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/rcupdate.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
+
+#include <netinet6/scope6_var.h>
+
+struct workqueue_struct *roce_gid_mgmt_wq;
+
+enum gid_op_type {
+ GID_DEL = 0,
+ GID_ADD
+};
+
+struct roce_gid_scan_event_work {
+ struct work_struct work;
+ struct net_device *ndev;
+};
+
+struct roce_rescan_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+};
+
+static const struct {
+ int flag_mask;
+ enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+ {IB_PORT_ROCE_V2, IB_GID_TYPE_ROCE_V2},
+ {IB_PORT_ROCE, IB_GID_TYPE_IB},
+ {IB_PORT_ROCE_V1_5, IB_GID_TYPE_ROCE_V1_5},
+};
+
+#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+static unsigned long gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+ struct ib_port_attr pattr;
+ int i;
+ int err;
+ unsigned int ret_flags = 0;
+
+ err = ib_query_port(ib_dev, port, &pattr);
+ if (err) {
+ pr_warn("update_gid: ib_query_port() failed for %s, %d\n",
+ ib_dev->name, err);
+ return 0;
+ }
+
+ for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+ if (pattr.port_cap_flags & PORT_CAP_TO_GID_TYPE[i].flag_mask)
+ ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+ return ret_flags;
+}
+
+static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
+ u8 port, union ib_gid *gid, struct net_device *ndev)
+{
+ int i;
+ unsigned long gid_type_mask = gid_type_mask_support(ib_dev, port);
+ struct ib_gid_attr gid_attr;
+
+ memset(&gid_attr, 0, sizeof(gid_attr));
+ gid_attr.ndev = ndev;
+
+ for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+ if ((1UL << i) & gid_type_mask) {
+ gid_attr.gid_type = i;
+ switch (gid_op) {
+ case GID_ADD:
+ roce_add_gid(ib_dev, port,
+ gid, &gid_attr);
+ break;
+ case GID_DEL:
+ roce_del_gid(ib_dev, port,
+ gid, &gid_attr);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+static int
+roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
+ struct net_device *idev, void *cookie)
+{
+ struct net_device *ndev = (struct net_device *)cookie;
+ if (idev == NULL)
+ return (0);
+ return (ndev == idev);
+}
+
+static int
+roce_gid_match_all(struct ib_device *ib_dev, u8 port,
+ struct net_device *idev, void *cookie)
+{
+ if (idev == NULL)
+ return (0);
+ return (1);
+}
+
+static int
+roce_gid_enum_netdev_default(struct ib_device *ib_dev,
+ u8 port, struct net_device *idev)
+{
+ unsigned long gid_type_mask;
+
+ gid_type_mask = gid_type_mask_support(ib_dev, port);
+
+ return roce_gid_cache_set_default_gid(ib_dev, port, idev, gid_type_mask,
+ ROCE_GID_CACHE_DEFAULT_MODE_SET);
+}
+
+#define ETH_IPOIB_DRV_NAME "ib"
+
+static inline int
+is_eth_ipoib_intf(struct net_device *dev)
+{
+ if (strncmp(dev->if_dname, ETH_IPOIB_DRV_NAME, 32))
+ return 0;
+ return 1;
+}
+
+static void
+roce_gid_update_addr_callback(struct ib_device *device, u8 port,
+ struct net_device *ndev, void *cookie)
+{
+ struct ipx_entry {
+ TAILQ_ENTRY(ipx_entry) entry;
+ union ipx_addr {
+ struct sockaddr sa[0];
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } ipx_addr;
+ };
+ struct ipx_entry *entry;
+ struct net_device *idev;
+ struct ifaddr *ifa;
+ union ib_gid gid;
+ int default_gids;
+ u32 scope_id;
+ u16 index_num;
+ int i;
+
+ TAILQ_HEAD(, ipx_entry) ipx_head;
+
+ TAILQ_INIT(&ipx_head);
+
+ /* make sure default GIDs are in */
+ default_gids = roce_gid_enum_netdev_default(device, port, ndev);
+
+ scope_id = rdma_get_ipv6_scope_id(device, port);
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(idev, &V_ifnet, if_link) {
+ if (idev != ndev) {
+ if (idev->if_type != IFT_L2VLAN)
+ continue;
+ if (ndev != rdma_vlan_dev_real_dev(idev))
+ continue;
+ }
+
+ /* clone address information for IPv4 and IPv6 */
+ IF_ADDR_RLOCK(idev);
+#if defined(INET)
+ TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr == NULL ||
+ ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (entry == NULL) {
+ pr_warn("roce_gid_update_addr_callback: "
+ "couldn't allocate entry for IPv4 update\n");
+ continue;
+ }
+ entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
+ TAILQ_INSERT_TAIL(&ipx_head, entry, entry);
+ }
+#endif
+#if defined(INET6)
+ TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr == NULL ||
+ ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (entry == NULL) {
+ pr_warn("roce_gid_update_addr_callback: "
+ "couldn't allocate entry for IPv6 update\n");
+ continue;
+ }
+ entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
+ sa6_recoverscope(&entry->ipx_addr.v6);
+ TAILQ_INSERT_TAIL(&ipx_head, entry, entry);
+ }
+#endif
+ IF_ADDR_RUNLOCK(idev);
+ }
+ IFNET_RUNLOCK();
+
+ /* add missing GIDs, if any */
+ TAILQ_FOREACH(entry, &ipx_head, entry) {
+ unsigned long gid_type_mask = gid_type_mask_support(device, port);
+
+ if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
+ continue;
+
+ for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+ if (!((1UL << i) & gid_type_mask))
+ continue;
+ /* check if entry found */
+ if (ib_find_cached_gid_by_port(device, &gid, i,
+ port, &init_net, ndev->if_index, &index_num) == 0)
+ break;
+ }
+ if (i != IB_GID_TYPE_SIZE)
+ continue;
+ /* add new GID */
+ update_gid(GID_ADD, device, port, &gid, ndev);
+ }
+
+ /* remove stale GIDs, if any */
+ for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, NULL) == 0; i++) {
+ union ipx_addr ipx;
+
+ /* don't delete empty entries */
+ if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
+ continue;
+
+ /* zero default */
+ memset(&ipx, 0, sizeof(ipx));
+
+ if (rdma_gid2ip(&ipx.sa[0], &gid, scope_id) != 0)
+ continue;
+
+ TAILQ_FOREACH(entry, &ipx_head, entry) {
+ if (memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
+ break;
+ }
+ /* check if entry found */
+ if (entry != NULL)
+ continue;
+ /* remove GID */
+ update_gid(GID_DEL, device, port, &gid, ndev);
+ }
+
+ while ((entry = TAILQ_FIRST(&ipx_head))) {
+ TAILQ_REMOVE(&ipx_head, entry, entry);
+ kfree(entry);
+ }
+}
+
+static void
+roce_gid_queue_scan_event_handler(struct work_struct *_work)
+{
+ struct roce_gid_scan_event_work *work =
+ container_of(_work, struct roce_gid_scan_event_work, work);
+
+ ib_enum_roce_ports_of_netdev(roce_gid_match_netdev, work->ndev,
+ roce_gid_update_addr_callback, NULL);
+
+ dev_put(work->ndev);
+ kfree(work);
+}
+
+static void
+roce_gid_queue_scan_event(struct net_device *ndev)
+{
+ struct roce_gid_scan_event_work *work;
+
+retry:
+ if (is_eth_ipoib_intf(ndev))
+ return;
+
+ if (ndev->if_type != IFT_ETHER) {
+ if (ndev->if_type == IFT_L2VLAN) {
+ ndev = rdma_vlan_dev_real_dev(ndev);
+ if (ndev != NULL)
+ goto retry;
+ }
+ return;
+ }
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
+ return;
+ }
+
+ INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
+ dev_hold(ndev);
+
+ work->ndev = ndev;
+
+ queue_work(roce_gid_mgmt_wq, &work->work);
+}
+
+static int
+inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *ndev = ptr;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_CHANGEIFADDR:
+ roce_gid_queue_scan_event(ndev);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nb_inetaddr = {
+ .notifier_call = inetaddr_event
+};
+
+static void
+roce_rescan_device_handler(struct work_struct *_work)
+{
+ struct roce_rescan_work *work =
+ container_of(_work, struct roce_rescan_work, work);
+
+ ib_dev_roce_ports_of_netdev(work->ib_dev, roce_gid_match_all, NULL,
+ roce_gid_update_addr_callback, NULL);
+ kfree(work);
+}
+
+/* Caller must flush system workqueue before removing the ib_device */
+int roce_rescan_device(struct ib_device *ib_dev)
+{
+ struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
+
+ if (!work)
+ return -ENOMEM;
+
+ work->ib_dev = ib_dev;
+ INIT_WORK(&work->work, roce_rescan_device_handler);
+ queue_work(roce_gid_mgmt_wq, &work->work);
+
+ return 0;
+}
+
+int __init roce_gid_mgmt_init(void)
+{
+ roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
+
+ if (!roce_gid_mgmt_wq) {
+ pr_warn("roce_gid_mgmt: can't allocate work queue\n");
+ return -ENOMEM;
+ }
+
+ register_inetaddr_notifier(&nb_inetaddr);
+
+ /* We relay on the netdevice notifier to enumerate all
+ * existing devices in the system. Register to this notifier
+ * last to make sure we will not miss any IP add/del
+ * callbacks.
+ */
+ register_netdevice_notifier(&nb_inetaddr);
+
+ return 0;
+}
+
+void __exit roce_gid_mgmt_cleanup(void)
+{
+ unregister_inetaddr_notifier(&nb_inetaddr);
+ unregister_netdevice_notifier(&nb_inetaddr);
+ /* Ensure all gid deletion tasks complete before we go down,
+ * to avoid any reference to free'd memory. By the time
+ * ib-core is removed, all physical devices have been removed,
+ * so no issue with remaining hardware contexts.
+ */
+ synchronize_rcu();
+ drain_workqueue(roce_gid_mgmt_wq);
+ destroy_workqueue(roce_gid_mgmt_wq);
+}
Index: sys/ofed/drivers/infiniband/core/sa.h
===================================================================
--- sys/ofed/drivers/infiniband/core/sa.h
+++ sys/ofed/drivers/infiniband/core/sa.h
@@ -53,7 +53,7 @@
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ int timeout_ms, int retries, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
Index: sys/ofed/drivers/infiniband/core/sa_query.c
===================================================================
--- sys/ofed/drivers/infiniband/core/sa_query.c
+++ sys/ofed/drivers/infiniband/core/sa_query.c
@@ -41,14 +41,12 @@
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
-
+#include <linux/if_ether.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
#include "sa.h"
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("InfiniBand subnet administration query support");
-MODULE_LICENSE("Dual BSD/GPL");
+/* InfiniBand subnet administration query support */
struct ib_sa_sm_ah {
struct ib_ah *ah;
@@ -419,6 +417,11 @@
ah_attr.dlid = port_attr.sm_lid;
ah_attr.sl = port_attr.sm_sl;
ah_attr.port_num = port->port_num;
+ if (port_attr.grh_required) {
+ ah_attr.ah_flags = IB_AH_GRH;
+ ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(IB_SA_WELL_KNOWN_GID_PREFIX);
+ ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+ }
new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
if (IS_ERR(new_ah->ah)) {
@@ -545,7 +548,8 @@
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
- ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
+ ret = ib_find_cached_gid(device, &rec->sgid, rec->gid_type,
+ rec->net, rec->ifindex, &port_num,
&gid_index);
if (ret)
return ret;
@@ -556,13 +560,8 @@
ah_attr->grh.traffic_class = rec->traffic_class;
}
if (force_grh) {
- memcpy(ah_attr->dmac, rec->dmac, 6);
- ah_attr->vlan_id = rec->vlan_id;
- } else {
- memset(ah_attr->dmac, 0, 6);
- ah_attr->vlan_id = 0xffff;
+ memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
}
-
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -616,7 +615,7 @@
spin_unlock_irqrestore(&tid_lock, flags);
}
-static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
+static int send_mad(struct ib_sa_query *query, int timeout_ms, int retries, gfp_t gfp_mask)
{
unsigned long flags;
int ret, id;
@@ -633,6 +632,7 @@
return ret;
query->mad_buf->timeout_ms = timeout_ms;
+ query->mad_buf->retries = retries;
query->mad_buf->context[0] = query;
query->id = id;
@@ -657,6 +657,12 @@
}
EXPORT_SYMBOL(ib_sa_unpack_path);
+void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+{
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_path);
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -669,10 +675,10 @@
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
- rec.vlan_id = 0xffff;
+ rec.net = NULL;
+ rec.ifindex = 0;
+ rec.gid_type = IB_GID_TYPE_IB;
memset(rec.dmac, 0, ETH_ALEN);
- memset(rec.smac, 0, ETH_ALEN);
-
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
@@ -683,7 +689,6 @@
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
-
/**
* ib_sa_path_rec_get - Start a Path get query
* @client:SA client
@@ -692,6 +697,7 @@
* @rec:Path Record to send in query
* @comp_mask:component mask to send in query
* @timeout_ms:time to wait for response
+ * @retries:retries to send for response
* @gfp_mask:GFP mask to use for internal allocations
* @callback:function called when query completes, times out or is
* canceled
@@ -710,15 +716,15 @@
* the query.
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_path_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, int retries, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
struct ib_sa_path_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
@@ -760,7 +766,7 @@
*sa_query = &query->sa_query;
- ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ ret = send_mad(&query->sa_query, timeout_ms, retries, gfp_mask);
if (ret < 0)
goto err2;
@@ -808,6 +814,7 @@
* @rec:Service Record to send in request
* @comp_mask:component mask to send in request
* @timeout_ms:time to wait for response
+ * @retries:retries to send for response
* @gfp_mask:GFP mask to use for internal allocations
* @callback:function called when request completes, times out or is
* canceled
@@ -830,7 +837,7 @@
struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ int timeout_ms, int retries, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_service_rec *resp,
void *context),
@@ -883,7 +890,7 @@
*sa_query = &query->sa_query;
- ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ ret = send_mad(&query->sa_query, timeout_ms, retries, gfp_mask);
if (ret < 0)
goto err2;
@@ -927,7 +934,7 @@
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ int timeout_ms, int retries, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
@@ -975,7 +982,7 @@
*sa_query = &query->sa_query;
- ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ ret = send_mad(&query->sa_query, timeout_ms, retries, gfp_mask);
if (ret < 0)
goto err2;
@@ -993,8 +1000,8 @@
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status,
+ struct ib_sa_mad *mad)
{
struct ib_sa_guidinfo_query *query =
container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
@@ -1018,7 +1025,7 @@
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
- int timeout_ms, gfp_t gfp_mask,
+ int timeout_ms, int retries, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
@@ -1027,7 +1034,7 @@
{
struct ib_sa_guidinfo_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
+ struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
@@ -1048,15 +1055,15 @@
if (!query)
return -ENOMEM;
- query->sa_query.port = port;
+ query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
@@ -1073,7 +1080,7 @@
*sa_query = &query->sa_query;
- ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ ret = send_mad(&query->sa_query, timeout_ms, retries, gfp_mask);
if (ret < 0)
goto err2;
@@ -1196,7 +1203,7 @@
INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
if (ib_register_event_handler(&sa_dev->event_handler))
- goto reg_err;
+ goto err;
for (i = 0; i <= e - s; ++i)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
@@ -1204,14 +1211,10 @@
return;
-reg_err:
- ib_set_client_data(device, &sa_client, NULL);
- i = e - s;
err:
- for (; i >= 0; --i)
- if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND &&
- !IS_ERR(sa_dev->port[i].agent))
- ib_unregister_mad_agent(sa_dev->port[i].agent);
+ while (--i >= 0)
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
kfree(sa_dev);
Index: sys/ofed/drivers/infiniband/core/sysfs.c
===================================================================
--- sys/ofed/drivers/infiniband/core/sysfs.c
+++ sys/ofed/drivers/infiniband/core/sysfs.c
@@ -36,15 +36,26 @@
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/rcupdate.h>
+#include <linux/netdevice.h>
#include <linux/fs.h>
#include <linux/printk.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
+struct ib_port;
+
+struct gid_attr_group {
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group ndev;
+ struct attribute_group type;
+};
struct ib_port {
struct kobject kobj;
struct ib_device *ibdev;
+ struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group pkey_group;
u8 port_num;
@@ -86,6 +97,24 @@
.show = port_attr_show
};
+static ssize_t gid_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct gid_attr_group,
+ kobj)->port;
+
+ if (!port_attr->show)
+ return -EIO;
+
+ return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+ .show = gid_attr_show
+};
+
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
@@ -188,6 +217,9 @@
if (ret)
return ret;
+ if (!attr.active_speed && !attr.active_width)
+ return sprintf(buf, "0 GB/sec\n");
+
ib_active_speed_enum_to_rate(attr.active_speed,
&rate,
&speed);
@@ -232,8 +264,6 @@
return sprintf(buf, "%s\n", "InfiniBand");
case IB_LINK_LAYER_ETHERNET:
return sprintf(buf, "%s\n", "Ethernet");
- case IB_LINK_LAYER_SCIF:
- return sprintf(buf, "%s\n", "SCIF");
default:
return sprintf(buf, "%s\n", "Unknown");
}
@@ -262,6 +292,46 @@
NULL
};
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+ if (!gid_attr->ndev)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", gid_attr->ndev->if_xname);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+ return sprintf(buf, "%s\n", roce_gid_cache_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf,
+ size_t (*print)(struct ib_gid_attr *gid_attr,
+ char *buf))
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
+ ssize_t ret;
+ va_list args;
+
+ rcu_read_lock();
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+ &gid_attr);
+ if (ret)
+ goto err;
+
+ ret = print(&gid_attr, buf);
+
+err:
+ va_end(args);
+ rcu_read_unlock();
+ return ret;
+}
+
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
@@ -270,13 +340,26 @@
union ib_gid gid;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
if (ret)
return ret;
return sprintf(buf, GID_PRINT_FMT"\n",GID_PRINT_ARGS(gid.raw));
}
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
@@ -292,124 +375,125 @@
return sprintf(buf, "0x%04x\n", pkey);
}
+#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
+
static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
- char *buf, int c_ext)
+ char *buf, int c_ext)
{
- struct port_table_attribute *tab_attr =
- container_of(attr, struct port_table_attribute, attr);
- int offset = tab_attr->index & 0xffff;
- int width = (tab_attr->index >> 16) & 0xff;
- struct ib_mad *in_mad = NULL;
- struct ib_mad *out_mad = NULL;
- ssize_t ret;
-
- if (!p->ibdev->process_mad)
- return -ENXIO;
-
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad) {
- ret = -ENOMEM;
- goto out;
- }
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ struct ib_mad *in_mad = NULL;
+ struct ib_mad *out_mad = NULL;
+ ssize_t ret;
- in_mad->mad_hdr.base_version = 1;
- in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
- in_mad->mad_hdr.class_version = 1;
- in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- if (c_ext)
- in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT;
- else
- in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS;
-
- in_mad->data[41] = p->port_num; /* PortSelect field */
-
- if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
- p->port_num, NULL, NULL, in_mad, out_mad) &
- (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
- (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
- ret = -EINVAL;
- goto out;
- }
+ if (!p->ibdev->process_mad)
+ return -ENOSYS;
- switch (width) {
- case 4:
- ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
- (4 - (offset % 8))) & 0xf);
- break;
- case 8:
- ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
- break;
- case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
- break;
- case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
- break;
- case 64:
- ret = sprintf(buf, "%llu\n",
- (unsigned long long)be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
- break;
- default:
- ret = 0;
- }
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ ret = -ENOMEM;
+ goto out;
+ }
-out:
- kfree(in_mad);
- kfree(out_mad);
+ in_mad->mad_hdr.base_version = 1;
+ in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
+ in_mad->mad_hdr.class_version = 1;
+ in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ if (c_ext)
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT;
+ else
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS;
+
+ in_mad->data[41] = p->port_num; /* PortSelect field */
+
+ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
+ p->port_num, NULL, NULL, in_mad, out_mad) &
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
+ ret = -EINVAL;
+ goto out;
+ }
- return ret;
-}
+ switch (width) {
+ case 4:
+ ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ (4 - (offset % 8))) & 0xf);
+ break;
+ case 8:
+ ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ break;
+ case 16:
+ ret = sprintf(buf, "%u\n",
+ be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 32:
+ ret = sprintf(buf, "%u\n",
+ be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 64:
+ ret = sprintf(buf, "%lu\n",
+ be64_to_cpup((__be64 *)(out_mad->data + 40 +
+ offset / 8)));
+ break;
+ default:
+ ret = 0;
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
-#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
-struct port_table_attribute port_pma_attr_##_name = { \
- .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+ return ret;
}
static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+ char *buf)
{
- return get_pma_counters(p, attr, buf, 0);
+ return get_pma_counters(p, attr, buf, 0);
}
-static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
-static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
-static PORT_PMA_ATTR(link_downed , 2, 8, 56);
-static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
+static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
+static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
+static PORT_PMA_ATTR(link_downed , 2, 8, 56);
+static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
-static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
+static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
-static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
+static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
-static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
-static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
-static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
-static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
-static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
+static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
+static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
+static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
+static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
+static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
static struct attribute *pma_attrs[] = {
- &port_pma_attr_symbol_error.attr.attr,
- &port_pma_attr_link_error_recovery.attr.attr,
- &port_pma_attr_link_downed.attr.attr,
- &port_pma_attr_port_rcv_errors.attr.attr,
- &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
- &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
- &port_pma_attr_port_xmit_discards.attr.attr,
- &port_pma_attr_port_xmit_constraint_errors.attr.attr,
- &port_pma_attr_port_rcv_constraint_errors.attr.attr,
- &port_pma_attr_local_link_integrity_errors.attr.attr,
- &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
- &port_pma_attr_VL15_dropped.attr.attr,
- &port_pma_attr_port_xmit_data.attr.attr,
- &port_pma_attr_port_rcv_data.attr.attr,
- &port_pma_attr_port_xmit_packets.attr.attr,
- &port_pma_attr_port_rcv_packets.attr.attr,
- NULL
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_port_xmit_data.attr.attr,
+ &port_pma_attr_port_rcv_data.attr.attr,
+ &port_pma_attr_port_xmit_packets.attr.attr,
+ &port_pma_attr_port_rcv_packets.attr.attr,
+ NULL
};
static struct attribute_group pma_group = {
@@ -417,42 +501,42 @@
.attrs = pma_attrs
};
-#define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \
-struct port_table_attribute port_pma_attr_ext_##_name = { \
- .attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+#define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
}
static ssize_t show_pma_counter_ext(struct ib_port *p,
- struct port_attribute *attr, char *buf)
+ struct port_attribute *attr, char *buf)
{
- return get_pma_counters(p, attr, buf, 1);
+ return get_pma_counters(p, attr, buf, 1);
}
-static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64);
-static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128);
-static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192);
-static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256);
-static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320);
-static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384);
-static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448);
-static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512);
+static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256);
+static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320);
+static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384);
+static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448);
+static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512);
static struct attribute *pma_attrs_ext[] = {
- &port_pma_attr_ext_port_xmit_data_64.attr.attr,
- &port_pma_attr_ext_port_rcv_data_64.attr.attr,
- &port_pma_attr_ext_port_xmit_packets_64.attr.attr,
- &port_pma_attr_ext_port_rcv_packets_64.attr.attr,
- &port_pma_attr_ext_port_unicast_xmit_packets.attr.attr,
- &port_pma_attr_ext_port_unicast_rcv_packets.attr.attr,
- &port_pma_attr_ext_port_multicast_xmit_packets.attr.attr,
- &port_pma_attr_ext_port_multicast_rcv_packets.attr.attr,
- NULL
+ &port_pma_attr_ext_port_xmit_data_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_data_64.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets_64.attr.attr,
+ &port_pma_attr_ext_port_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_rcv_packets.attr.attr,
+ NULL
};
static struct attribute_group pma_ext_group = {
- .name = "counters_ext",
- .attrs = pma_attrs_ext
+ .name = "counters_ext",
+ .attrs = pma_attrs_ext
};
static void ib_port_release(struct kobject *kobj)
@@ -461,25 +545,58 @@
struct attribute *a;
int i;
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
+ if (p->gid_group.attrs) {
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->gid_group.attrs);
+ kfree(p->gid_group.attrs);
+ }
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
+ if (p->pkey_group.attrs) {
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->pkey_group.attrs);
+ kfree(p->pkey_group.attrs);
+ }
kfree(p);
}
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+ struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+ kobj);
+ struct attribute *a;
+ int i;
+
+ if (g->ndev.attrs) {
+ for (i = 0; (a = g->ndev.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->ndev.attrs);
+ }
+
+ if (g->type.attrs) {
+ for (i = 0; (a = g->type.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->type.attrs);
+ }
+
+ kfree(g);
+}
+
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
.default_attrs = port_default_attrs
};
+static struct kobj_type gid_attr_type = {
+ .sysfs_ops = &gid_attr_sysfs_ops,
+ .release = ib_port_gid_attr_release
+};
+
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -549,8 +666,8 @@
}
static int add_port(struct ib_device *device, int port_num,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *))
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
{
struct ib_port *p;
struct ib_port_attr attr;
@@ -571,56 +688,123 @@
ret = kobject_init_and_add(&p->kobj, &port_type,
device->ports_parent,
"%d", port_num);
- if (ret)
+ if (ret) {
+ kfree(p);
+ return ret;
+ }
+
+ p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+ if (!p->gid_attr_group) {
+ ret = -ENOMEM;
+ goto err_put;
+ }
+
+ p->gid_attr_group->port = p;
+ ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+ &p->kobj, "gid_attrs");
+ if (ret) {
+ kfree(p->gid_attr_group);
goto err_put;
+ }
ret = sysfs_create_group(&p->kobj, &pma_group);
if (ret)
- goto err_put;
+ goto err_put_gid_attrs;
- ret = sysfs_create_group(&p->kobj, &pma_ext_group);
- if (ret)
- goto err_remove_pma;
+ ret = sysfs_create_group(&p->kobj, &pma_ext_group);
+ if (ret)
+ goto err_remove_pma;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
- if (!p->gid_group.attrs)
+ if (!p->gid_group.attrs) {
+ ret = -ENOMEM;
goto err_remove_pma_ext;
+ }
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
+ p->gid_attr_group->ndev.name = "ndevs";
+ p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->ndev.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+ if (ret)
+ goto err_free_gid_ndev;
+
+ p->gid_attr_group->type.name = "types";
+ p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->type.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid_ndev;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+ if (ret)
+ goto err_free_gid_type;
+
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
- if (!p->pkey_group.attrs)
- goto err_remove_gid;
+ if (!p->pkey_group.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid_type;
+ }
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
goto err_free_pkey;
- if (port_callback) {
- ret = port_callback(device, port_num, &p->kobj);
- if (ret)
- goto err_remove_pkey;
- }
+ if (port_callback) {
+ ret = port_callback(device, port_num, &p->kobj);
+ if (ret)
+ goto err_remove_pkey;
+ }
list_add_tail(&p->kobj.entry, &device->port_list);
-#ifdef __linux__
- kobject_uevent(&p->kobj, KOBJ_ADD);
-#endif
+
return 0;
err_remove_pkey:
- sysfs_remove_group(&p->kobj, &p->pkey_group);
+ sysfs_remove_group(&p->kobj, &p->pkey_group);
err_free_pkey:
for (i = 0; i < attr.pkey_tbl_len; ++i)
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
+ p->pkey_group.attrs = NULL;
+
+err_remove_gid_type:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+
+err_free_gid_type:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->type.attrs[i]);
+
+ kfree(p->gid_attr_group->type.attrs);
+ p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->ndev.attrs[i]);
+
+ kfree(p->gid_attr_group->ndev.attrs);
+ p->gid_attr_group->ndev.attrs = NULL;
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -630,16 +814,19 @@
kfree(p->gid_group.attrs[i]);
kfree(p->gid_group.attrs);
+ p->gid_group.attrs = NULL;
err_remove_pma_ext:
- sysfs_remove_group(&p->kobj, &pma_ext_group);
+ sysfs_remove_group(&p->kobj, &pma_ext_group);
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
+err_put_gid_attrs:
+ kobject_put(&p->gid_attr_group->kobj);
+
err_put:
- kobject_put(device->ports_parent);
- kfree(p);
+ kobject_put(&p->kobj);
return ret;
}
@@ -653,7 +840,6 @@
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
- case RDMA_NODE_MIC: return sprintf(buf, "%d: MIC\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
@@ -675,7 +861,7 @@
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
}
-
+
static ssize_t show_node_guid(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -715,83 +901,21 @@
return count;
}
-static ssize_t show_cmd_perf(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- return sprintf(buf, "%d\n", dev->cmd_perf);
-}
-
-static ssize_t set_cmd_perf(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
- u32 val;
-
- if (sscanf(buf, "0x%x", &val) != 1)
- return -EINVAL;
-
- dev->cmd_perf = val;
-
- return count;
-}
-
-static ssize_t show_cmd_avg(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- return sprintf(buf, "%llu\n", (unsigned long long)dev->cmd_avg);
-}
-
-static ssize_t set_cmd_avg(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- spin_lock(&dev->cmd_perf_lock);
- dev->cmd_avg = 0;
- dev->cmd_n = 0;
- spin_unlock(&dev->cmd_perf_lock);
-
- return count;
-}
-
-static ssize_t show_cmd_n(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- return sprintf(buf, "%d\n", dev->cmd_n);
-}
-
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
-static DEVICE_ATTR(cmd_perf, S_IRUGO | S_IWUSR, show_cmd_perf, set_cmd_perf);
-static DEVICE_ATTR(cmd_avg, S_IRUGO | S_IWUSR, show_cmd_avg, set_cmd_avg);
-static DEVICE_ATTR(cmd_n, S_IRUGO, show_cmd_n, NULL);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
- &dev_attr_node_desc,
- &dev_attr_cmd_perf,
- &dev_attr_cmd_avg,
- &dev_attr_cmd_n,
+ &dev_attr_node_desc
};
static struct class ib_class = {
.name = "infiniband",
.dev_release = ib_device_release,
-#ifdef __linux__
- .dev_uevent = ib_device_uevent,
-#endif
};
/* Show a given an attribute in the statistics group */
@@ -908,6 +1032,28 @@
.attrs = iw_proto_stats_attrs,
};
+static void free_port_list_attributes(struct ib_device *device)
+{
+ struct kobject *p, *t;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ struct ib_port *port = container_of(p, struct ib_port, kobj);
+ list_del(&p->entry);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &pma_ext_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->ndev);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->type);
+ kobject_put(&port->gid_attr_group->kobj);
+ kobject_put(p);
+ }
+
+ kobject_put(device->ports_parent);
+}
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -918,8 +1064,8 @@
class_dev->class = &ib_class;
class_dev->parent = device->dma_device;
- dev_set_name(class_dev, device->name);
- dev_set_drvdata(class_dev, device);
+ dev_set_name(class_dev, "%s", device->name);
+ dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
@@ -933,8 +1079,9 @@
goto err_unregister;
}
- device->ports_parent = kobject_create_and_add("ports",&class_dev->kobj);
- if (!device->ports_parent) {
+ device->ports_parent = kobject_create_and_add("ports",
+ &class_dev->kobj);
+ if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
}
@@ -960,21 +1107,7 @@
return 0;
err_put:
- {
- struct kobject *p, *t;
- struct ib_port *port;
-
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
- }
-
- kobject_put(&class_dev->kobj);
+ free_port_list_attributes(device);
err_unregister:
@@ -990,28 +1123,18 @@
void ib_device_unregister_sysfs(struct ib_device *device)
{
+ /* Hold kobject until ib_dealloc_device() */
+ struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
int i;
- struct kobject *p, *t;
- struct ib_port *port;
- struct device *class_dev = &device->dev;
- /* Hold kobject until ib_dealloc_device() */
- kobject_get(&device->dev.kobj);
+ if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
+ sysfs_remove_group(kobj_dev, &iw_stats_group);
- for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
- device_remove_file(class_dev, ib_class_attributes[i]);
- }
+ free_port_list_attributes(device);
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
+ device_remove_file(&device->dev, ib_class_attributes[i]);
- kobject_put(device->ports_parent);
device_unregister(&device->dev);
}
Index: sys/ofed/drivers/infiniband/core/ucm.c
===================================================================
--- sys/ofed/drivers/infiniband/core/ucm.c
+++ sys/ofed/drivers/infiniband/core/ucm.c
@@ -32,7 +32,7 @@
*/
#include <linux/completion.h>
-#include <linux/fs.h>
+ #include <linux/fs.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -51,9 +51,7 @@
#include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h>
-MODULE_AUTHOR("Libor Michalek");
-MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
-MODULE_LICENSE("Dual BSD/GPL");
+/* InfiniBand userspace Connection Manager access */
struct ib_ucm_device {
int devnum;
@@ -120,6 +118,9 @@
static DEFINE_IDR(ctx_id_table);
static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
+
static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
{
struct ib_ucm_context *ctx;
@@ -1214,17 +1215,17 @@
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
cdev_del(&ucm_dev->cdev);
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(ucm_dev->devnum, dev_map);
+ clear_bit(ucm_dev->devnum, dev_map);
else
- clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
+ clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
- .owner = THIS_MODULE,
- .open = ib_ucm_open,
+ .owner = THIS_MODULE,
+ .open = ib_ucm_open,
.release = ib_ucm_close,
- .write = ib_ucm_write,
+ .write = ib_ucm_write,
.poll = ib_ucm_poll,
.llseek = no_llseek,
};
@@ -1239,8 +1240,6 @@
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
static int find_overflow_devnum(void)
{
int ret;
@@ -1281,7 +1280,7 @@
if (devnum >= IB_UCM_MAX_DEVICES) {
devnum = find_overflow_devnum();
if (devnum < 0)
- goto err;
+ goto err;
ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
base = devnum + overflow_maj;
@@ -1337,7 +1336,7 @@
static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+ return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
Index: sys/ofed/drivers/infiniband/core/ucma.c
===================================================================
--- sys/ofed/drivers/infiniband/core/ucma.c
+++ sys/ofed/drivers/infiniband/core/ucma.c
@@ -48,10 +48,10 @@
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
-MODULE_LICENSE("Dual BSD/GPL");
+/* RDMA Userspace Connection Manager Access */
static unsigned int max_backlog = 1024;
@@ -61,6 +61,7 @@
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
+ struct workqueue_struct *close_wq;
};
struct ucma_context {
@@ -76,6 +77,9 @@
struct list_head list;
struct list_head mc_list;
+ int closing;
+ int destroying;
+ struct work_struct close_work;
};
struct ucma_multicast {
@@ -94,6 +98,7 @@
struct list_head list;
struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
+ struct work_struct close_work;
};
static DEFINE_MUTEX(mut);
@@ -119,8 +124,12 @@
mutex_lock(&mut);
ctx = _ucma_find_context(id, file);
- if (!IS_ERR(ctx))
- atomic_inc(&ctx->ref);
+ if (!IS_ERR(ctx)) {
+ if (ctx->closing)
+ ctx = ERR_PTR(-EIO);
+ else
+ atomic_inc(&ctx->ref);
+ }
mutex_unlock(&mut);
return ctx;
}
@@ -131,6 +140,34 @@
complete(&ctx->comp);
}
+static void ucma_close_event_id(struct work_struct *work)
+{
+ struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
+
+ rdma_destroy_id(uevent_close->cm_id);
+ kfree(uevent_close);
+}
+
+static void ucma_close_id(struct work_struct *work)
+{
+ struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
+
+ /* Fence to ensure that ctx->closing was seen by all
+ * ucma_get_ctx running
+ */
+ mutex_lock(&mut);
+ mutex_unlock(&mut);
+
+ /* once all inflight tasks are finished, we close all underlying
+ * resources. The context is still alive till its explicit destryoing
+ * by its creator.
+ */
+ ucma_put_ctx(ctx);
+ wait_for_completion(&ctx->comp);
+ /* No new events will be generated after destroying the id. */
+ rdma_destroy_id(ctx->cm_id);
+}
+
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
{
struct ucma_context *ctx;
@@ -140,6 +177,7 @@
if (!ctx)
return NULL;
+ INIT_WORK(&ctx->close_work, ucma_close_id);
atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
@@ -245,6 +283,42 @@
}
}
+/* Called with file->mut locked for the relevant context. */
+static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+{
+ struct ucma_context *ctx = cm_id->context;
+ struct ucma_event *con_req_eve;
+ int event_found = 0;
+
+ if (ctx->destroying)
+ return;
+
+ /* only if context is pointing to cm_id that it owns it and can be
+ * queued to be closed, otherwise that cm_id is an inflight one that
+ * is part of that context event list pending to be detached and
+ * reattached to its new context as part of ucma_get_event,
+ * handled separately below.
+ */
+ if (ctx->cm_id == cm_id) {
+ ctx->closing = 1;
+ queue_work(ctx->file->close_wq, &ctx->close_work);
+ return;
+ }
+
+ list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
+ if (con_req_eve->cm_id == cm_id &&
+ con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
+ list_del(&con_req_eve->list);
+ INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
+ queue_work(ctx->file->close_wq, &con_req_eve->close_work);
+ event_found = 1;
+ break;
+ }
+ }
+ if (!event_found)
+ printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n");
+}
+
static int ucma_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -274,13 +348,18 @@
goto out;
}
ctx->backlog--;
- } else if (!ctx->uid) {
+ } else if (!ctx->uid || ctx->cm_id != cm_id) {
/*
* We ignore events for new connections until userspace has set
* their context. This can only happen if an error occurs on a
* new connection before the user accepts it. This is okay,
- * since the accept will just fail later.
+ * since the accept will just fail later. However, we do need
+ * to release the underlying HW resources in case of a device
+ * removal event.
*/
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ ucma_removal_event_handler(cm_id);
+
kfree(uevent);
goto out;
}
@@ -289,6 +368,8 @@
wake_up_interruptible(&ctx->file->poll_wait);
if (ctx->file->filp)
selwakeup(&ctx->file->filp->f_selinfo);
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ ucma_removal_event_handler(cm_id);
out:
mutex_unlock(&ctx->file->mut);
return ret;
@@ -372,7 +453,7 @@
}
static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
- int in_len, int out_len)
+ int in_len, int out_len)
{
struct rdma_ucm_create_id cmd;
struct rdma_ucm_create_id_resp resp;
@@ -402,6 +483,7 @@
ret = PTR_ERR(ctx->cm_id);
goto err1;
}
+
ctx->cm_id->ucontext = ctx;
resp.id = ctx->id;
@@ -449,9 +531,15 @@
}
/*
- * We cannot hold file->mut when calling rdma_destroy_id() or we can
- * deadlock. We also acquire file->mut in ucma_event_handler(), and
- * rdma_destroy_id() will wait until all callbacks have completed.
+ * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
+ * this point, no new events will be reported from the hardware. However, we
+ * still need to cleanup the UCMA context for this ID. Specifically, there
+ * might be events that have not yet been consumed by the user space software.
+ * These might include pending connect requests which we have not completed
+ * processing. We cannot call rdma_destroy_id while holding the lock of the
+ * context (file->mut), as it might cause a deadlock. We therefore extract all
+ * relevant events from the context pending events list while holding the
+ * mutex. After that we release them as needed.
*/
static int ucma_free_ctx(struct ucma_context *ctx)
{
@@ -459,8 +547,6 @@
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
- /* No new events will be generated after destroying the id. */
- rdma_destroy_id(ctx->cm_id);
ucma_cleanup_multicast(ctx);
@@ -508,10 +594,20 @@
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- resp.events_reported = ucma_free_ctx(ctx);
+ mutex_lock(&ctx->file->mut);
+ ctx->destroying = 1;
+ mutex_unlock(&ctx->file->mut);
+
+ flush_workqueue(ctx->file->close_wq);
+ /* At this point it's guaranteed that there is no inflight
+ * closing task */
+ if (!ctx->closing) {
+ ucma_put_ctx(ctx);
+ wait_for_completion(&ctx->comp);
+ rdma_destroy_id(ctx->cm_id);
+ }
+ resp.events_reported = ucma_free_ctx(ctx);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -519,10 +615,10 @@
return ret;
}
-static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
+static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_bind_addr cmd;
+ struct rdma_ucm_bind_ip cmd;
struct ucma_context *ctx;
int ret;
@@ -538,24 +634,75 @@
return ret;
}
+static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_bind cmd;
+ struct sockaddr *addr;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ addr = (struct sockaddr *) &cmd.addr;
+ if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
+ return -EINVAL;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_bind_addr(ctx->cm_id, addr);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_resolve_ip(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_resolve_ip cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
+ (struct sockaddr *) &cmd.dst_addr,
+ cmd.timeout_ms);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
static ssize_t ucma_resolve_addr(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_resolve_addr cmd;
+ struct sockaddr *src, *dst;
struct ucma_context *ctx;
int ret;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ src = (struct sockaddr *) &cmd.src_addr;
+ dst = (struct sockaddr *) &cmd.dst_addr;
+ if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
+ !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
+ return -EINVAL;
+
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
- (struct sockaddr *) &cmd.dst_addr,
- cmd.timeout_ms);
+ ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
ucma_put_ctx(ctx);
return ret;
}
@@ -648,7 +795,7 @@
const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_query_route cmd;
+ struct rdma_ucm_query cmd;
struct rdma_ucm_query_route_resp resp;
struct ucma_context *ctx;
struct sockaddr *addr;
@@ -708,7 +855,288 @@
return ret;
}
-static void ucma_copy_conn_param(struct rdma_conn_param *dst,
+static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
+ struct rdma_ucm_query_addr_resp *resp)
+{
+ if (!cm_id->device)
+ return;
+
+ resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->port_num = cm_id->port_num;
+ resp->pkey = (__force __u16) cpu_to_be16(
+ ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
+}
+
+static ssize_t ucma_query_addr(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_addr_resp resp;
+ struct sockaddr *addr;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ memset(&resp, 0, sizeof resp);
+
+ addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
+ resp.src_size = rdma_addr_size(addr);
+ memcpy(&resp.src_addr, addr, resp.src_size);
+
+ addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
+ resp.dst_size = rdma_addr_size(addr);
+ memcpy(&resp.dst_addr, addr, resp.dst_size);
+
+ ucma_query_device_addr(ctx->cm_id, &resp);
+
+ if (copy_to_user(response, &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static ssize_t ucma_query_path(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_path_resp *resp;
+ int i, ret = 0;
+
+ if (out_len < sizeof(*resp))
+ return -ENOSPC;
+
+ resp = kzalloc(out_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->num_paths = ctx->cm_id->route.num_paths;
+ for (i = 0, out_len -= sizeof(*resp);
+ i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
+ i++, out_len -= sizeof(struct ib_path_rec_data)) {
+
+ resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
+ IB_PATH_BIDIRECTIONAL;
+ ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
+ &resp->path_data[i].path_rec);
+ }
+
+ if (copy_to_user(response, resp,
+ sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
+ ret = -EFAULT;
+
+ kfree(resp);
+ return ret;
+}
+
+static ssize_t ucma_query_gid(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_addr_resp resp;
+ struct sockaddr_ib *addr;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ memset(&resp, 0, sizeof resp);
+
+ ucma_query_device_addr(ctx->cm_id, &resp);
+
+ addr = (struct sockaddr_ib *) &resp.src_addr;
+ resp.src_size = sizeof(*addr);
+ if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
+ memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
+ } else {
+ addr->sib_family = AF_IB;
+ addr->sib_pkey = (__force __be16) resp.pkey;
+ rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
+ (union ib_gid *) &addr->sib_addr);
+ addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+ &ctx->cm_id->route.addr.src_addr);
+ }
+
+ addr = (struct sockaddr_ib *) &resp.dst_addr;
+ resp.dst_size = sizeof(*addr);
+ if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
+ memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
+ } else {
+ addr->sib_family = AF_IB;
+ addr->sib_pkey = (__force __be16) resp.pkey;
+ rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
+ (union ib_gid *) &addr->sib_addr);
+ addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
+ &ctx->cm_id->route.addr.dst_addr);
+ }
+
+ if (copy_to_user(response, &resp, sizeof(resp)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static ssize_t ucma_query(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_query cmd;
+ struct ucma_context *ctx;
+ void __user *response;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ response = (void __user *)(unsigned long) cmd.response;
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ switch (cmd.option) {
+ case RDMA_USER_CM_QUERY_ADDR:
+ ret = ucma_query_addr(ctx, response, out_len);
+ break;
+ case RDMA_USER_CM_QUERY_PATH:
+ ret = ucma_query_path(ctx, response, out_len);
+ break;
+ case RDMA_USER_CM_QUERY_GID:
+ ret = ucma_query_gid(ctx, response, out_len);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ ucma_put_ctx(ctx);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ return ret;
+}
+
+static void ucma_copy_conn_param(struct rdma_cm_id *id,
+ struct rdma_conn_param *dst,
struct rdma_ucm_conn_param *src)
{
dst->private_data = src->private_data;
@@ -720,6 +1148,7 @@
dst->rnr_retry_count = src->rnr_retry_count;
dst->srq = src->srq;
dst->qp_num = src->qp_num;
+ dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
}
static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
@@ -740,7 +1169,7 @@
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
ret = rdma_connect(ctx->cm_id, &conn_param);
ucma_put_ctx(ctx);
return ret;
@@ -783,7 +1212,7 @@
return PTR_ERR(ctx);
if (cmd.conn_param.valid) {
- ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
ret = rdma_accept(ctx->cm_id, &conn_param);
if (!ret)
@@ -924,6 +1353,8 @@
if (!optlen)
return -EINVAL;
+ memset(&sa_path, 0, sizeof(sa_path));
+
ib_sa_unpack_path(path_data->path_rec, &sa_path);
ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
if (ret)
@@ -937,7 +1368,7 @@
static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
- int ret = 0;
+ int ret = -ENOSYS;
switch (optname) {
case RDMA_OPTION_IB_PATH:
@@ -1035,23 +1466,23 @@
return ret;
}
-static ssize_t ucma_join_multicast(struct ucma_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
+static ssize_t ucma_process_join(struct ucma_file *file,
+ struct rdma_ucm_join_mcast *cmd, int out_len)
{
- struct rdma_ucm_join_mcast cmd;
struct rdma_ucm_create_id_resp resp;
struct ucma_context *ctx;
struct ucma_multicast *mc;
+ struct sockaddr *addr;
int ret;
if (out_len < sizeof(resp))
return -ENOSPC;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
+ addr = (struct sockaddr *) &cmd->addr;
+ if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+ return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx(file, cmd->id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1062,14 +1493,14 @@
goto err1;
}
- mc->uid = cmd.uid;
- memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+ mc->uid = cmd->uid;
+ memcpy(&mc->addr, addr, cmd->addr_size);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
if (ret)
goto err2;
resp.id = mc->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user((void __user *)(unsigned long) cmd->response,
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err3;
@@ -1094,6 +1525,38 @@
return ret;
}
+static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_ip_mcast cmd;
+ struct rdma_ucm_join_mcast join_cmd;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ join_cmd.response = cmd.response;
+ join_cmd.uid = cmd.uid;
+ join_cmd.id = cmd.id;
+ join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
+ join_cmd.reserved = 0;
+ memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
+
+ return ucma_process_join(file, &join_cmd, out_len);
+}
+
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_mcast cmd;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ return ucma_process_join(file, &cmd, out_len);
+}
+
static ssize_t ucma_leave_multicast(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -1236,25 +1699,29 @@
static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len) = {
- [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
- [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
- [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr,
- [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
- [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route,
- [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
- [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
- [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
- [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
- [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
- [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
- [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
- [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
- [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
- [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
- [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
- [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
- [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
- [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id
+ [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
+ [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
+ [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip,
+ [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip,
+ [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
+ [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
+ [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
+ [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
+ [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
+ [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
+ [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
+ [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
+ [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
+ [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
+ [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
+ [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
+ [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
+ [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
+ [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id,
+ [RDMA_USER_CM_CMD_QUERY] = ucma_query,
+ [RDMA_USER_CM_CMD_BIND] = ucma_bind,
+ [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
@@ -1319,6 +1786,7 @@
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
mutex_init(&file->mut);
+ file->close_wq = create_singlethread_workqueue("ucma_close_id");
filp->private_data = file;
file->filp = filp;
@@ -1333,16 +1801,28 @@
mutex_lock(&file->mut);
list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
+ ctx->destroying = 1;
mutex_unlock(&file->mut);
mutex_lock(&mut);
idr_remove(&ctx_idr, ctx->id);
mutex_unlock(&mut);
+ flush_workqueue(file->close_wq);
+ /* At that step once ctx was marked as destroying and workqueue
+ * was flushed we are safe from any inflights handlers that
+ * might put other closing task.
+ */
+ if (!ctx->closing)
+ /* rdma_destroy_id ensures that no event handlers are
+ * inflight for that id before releasing it.
+ */
+ rdma_destroy_id(ctx->cm_id);
ucma_free_ctx(ctx);
mutex_lock(&file->mut);
}
mutex_unlock(&file->mut);
+ destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}
@@ -1371,11 +1851,11 @@
};
static struct miscdevice ucma_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "rdma_cm",
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "rdma_cm",
.nodename = "infiniband/rdma_cm",
.mode = 0666,
- .fops = &ucma_fops,
+ .fops = &ucma_fops,
};
static ssize_t show_abi_version(struct device *dev,
@@ -1399,7 +1879,6 @@
printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
goto err1;
}
-
return 0;
err1:
misc_deregister(&ucma_misc);
Index: sys/ofed/drivers/infiniband/core/ud_header.c
===================================================================
--- sys/ofed/drivers/infiniband/core/ud_header.c
+++ sys/ofed/drivers/infiniband/core/ud_header.c
@@ -35,6 +35,9 @@
#include <linux/string.h>
#include <linux/module.h>
#include <linux/if_ether.h>
+#include <netinet/ip.h>
+
+#include <machine/in_cksum.h>
#include <rdma/ib_pack.h>
@@ -116,6 +119,68 @@
.size_bits = 16 }
};
+static const struct ib_field ip4_table[] = {
+ { STRUCT_FIELD(ip4, ver_len),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, tos),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, tot_len),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, id),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, frag_off),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, ttl),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, protocol),
+ .offset_words = 2,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, check),
+ .offset_words = 2,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, saddr),
+ .offset_words = 3,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(ip4, daddr),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 32 }
+};
+
+static const struct ib_field udp_table[] = {
+ { STRUCT_FIELD(udp, sport),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, dport),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, length),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, csum),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
@@ -213,6 +278,26 @@
.size_bits = 24 }
};
+u16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+ struct ip iph;
+
+ iph.ip_hl = 5;
+ iph.ip_v = 4;
+ iph.ip_tos = header->ip4.tos;
+ iph.ip_len = header->ip4.tot_len;
+ iph.ip_id = header->ip4.id;
+ iph.ip_off = header->ip4.frag_off;
+ iph.ip_ttl = header->ip4.ttl;
+ iph.ip_p = header->ip4.protocol;
+ iph.ip_sum = 0;
+ iph.ip_src.s_addr = header->ip4.saddr;
+ iph.ip_dst.s_addr = header->ip4.daddr;
+
+ return in_cksum_hdr(&iph);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
@@ -220,21 +305,37 @@
* @eth_present: specify if Eth header is present
* @vlan_present: packet is tagged vlan
* @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @ip_version:GRH flag (if non-zero, IP header, V4 or V6, will be included)
+ * @grh_present:GRH flag (if non-zero, UDP header will be included)
* @immediate_present: specify if immediate data is present
* @header:Structure to initialize
*/
-void ib_ud_header_init(int payload_bytes,
- int lrh_present,
- int eth_present,
- int vlan_present,
- int grh_present,
- int immediate_present,
- struct ib_ud_header *header)
+int ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int ip_version,
+ int udp_present,
+ int immediate_present,
+ struct ib_ud_header *header)
{
+ int ipv4_present;
+ int ipv6_present;
+
+ grh_present = grh_present && !ip_version;
memset(header, 0, sizeof *header);
+ /*
+ * UDP header without IP header doesn't make sense
+ */
+ if (udp_present && ip_version != 4 && ip_version != 6)
+ return -EINVAL;
+
+ ipv4_present = (ip_version == 4);
+ ipv6_present = (ip_version == 6);
if (lrh_present) {
- u16 packet_length = 0;
+ u16 packet_length;
header->lrh.link_version = 0;
header->lrh.link_next_header =
@@ -250,18 +351,37 @@
}
if (vlan_present)
- header->eth.type = cpu_to_be16(ETH_P_8021Q);
+ header->eth.type = cpu_to_be16(ETH_P_8021Q);
+
+ if (ipv6_present || grh_present) {
+ header->grh.ip_version = 6;
+ header->grh.payload_length =
+ cpu_to_be16((IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) & ~3); /* round up */
+ }
+
+ if (ipv4_present) {
+ int udp_bytes = udp_present ? IB_UDP_BYTES : 0;
- if (grh_present) {
- header->grh.ip_version = 6;
- header->grh.payload_length =
- cpu_to_be16((IB_BTH_BYTES +
+ header->ip4.ver_len = 0x45; /* version 4, 5 words */
+ header->ip4.tot_len =
+ cpu_to_be16(IB_IP4_BYTES +
+ udp_bytes +
+ IB_BTH_BYTES +
IB_DETH_BYTES +
payload_bytes +
- 4 + /* ICRC */
- 3) & ~3); /* round up */
- header->grh.next_header = 0x1b;
+ 4); /* ICRC */
}
+ if (udp_present && ip_version)
+ header->udp.length =
+ cpu_to_be16(IB_UDP_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +393,11 @@
header->lrh_present = lrh_present;
header->eth_present = eth_present;
header->vlan_present = vlan_present;
- header->grh_present = grh_present;
+ header->grh_present = grh_present || ipv6_present;
+ header->ipv4_present = ipv4_present;
+ header->udp_present = udp_present;
header->immediate_present = immediate_present;
+ return 0;
}
EXPORT_SYMBOL(ib_ud_header_init);
@@ -311,6 +434,16 @@
&header->grh, buf + len);
len += IB_GRH_BYTES;
}
+ if (header->ipv4_present) {
+ ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+ &header->ip4, buf + len);
+ len += IB_IP4_BYTES;
+ }
+ if (header->udp_present) {
+ ib_pack(udp_table, ARRAY_SIZE(udp_table),
+ &header->udp, buf + len);
+ len += IB_UDP_BYTES;
+ }
ib_pack(bth_table, ARRAY_SIZE(bth_table),
&header->bth, buf + len);
Index: sys/ofed/drivers/infiniband/core/umem.c
===================================================================
--- sys/ofed/drivers/infiniband/core/umem.c
+++ sys/ofed/drivers/infiniband/core/umem.c
@@ -39,7 +39,7 @@
#include <linux/sched.h>
#include <linux/dma-attrs.h>
#include <linux/slab.h>
-#include <linux/module.h>
+
#include <sys/priv.h>
#include <sys/resourcevar.h>
#include <vm/vm_pageout.h>
@@ -48,9 +48,6 @@
#define IB_UMEM_MAX_PAGE_CHUNK (PAGE_SIZE / sizeof (struct page *))
-static int allow_weak_ordering;
-module_param_named(weak_ordering, allow_weak_ordering, int, 0444);
-MODULE_PARM_DESC(weak_ordering, "Allow weak ordering for data registered memory");
static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
struct ib_umem *umem, unsigned long addr,
@@ -65,15 +62,17 @@
invalidation_ctx = kzalloc(sizeof(*invalidation_ctx), GFP_KERNEL);
if (!invalidation_ctx) {
ret = -ENOMEM;
- goto out;
+ goto end;
}
umem->invalidation_ctx = invalidation_ctx;
invalidation_ctx->umem = umem;
mutex_lock(&ib_peer_mem->lock);
- invalidation_ctx->context_ticket =
- ib_peer_insert_context(ib_peer_mem, invalidation_ctx);
+ ret = ib_peer_insert_context(ib_peer_mem, invalidation_ctx,
+ &invalidation_ctx->context_ticket);
/* unlock before calling get pages to prevent a dead-lock from the callback */
mutex_unlock(&ib_peer_mem->lock);
+ if (ret)
+ goto end;
}
ret = peer_mem->get_pages(addr, umem->length, umem->writable, 1,
@@ -121,15 +120,17 @@
if (invalidation_ctx) {
ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
mutex_unlock(&umem->ib_peer_mem->lock);
- kfree(invalidation_ctx);
}
+end:
+ if (invalidation_ctx)
+ kfree(invalidation_ctx);
+
ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context,
umem->peer_mem_srcu_key);
kfree(umem);
return ERR_PTR(ret);
}
-
static void peer_umem_release(struct ib_umem *umem)
{
struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
@@ -195,23 +196,26 @@
object = NULL;
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
- umem->nmap,
- DMA_BIDIRECTIONAL);
+ umem->nmap,
+ DMA_BIDIRECTIONAL);
+
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
page = sg_page(sg);
- if (umem->writable && dirty) {
- if (object && object != page->object)
- VM_OBJECT_WUNLOCK(object);
- if (object != page->object) {
- object = page->object;
- VM_OBJECT_WLOCK(object);
- }
- vm_page_dirty(page);
+ if (umem->writable && dirty) {
+ if (object && object != page->object)
+ VM_OBJECT_WUNLOCK(object);
+ if (object != page->object) {
+ object = page->object;
+ VM_OBJECT_WLOCK(object);
}
+ vm_page_dirty(page);
}
+ }
+
sg_free_table(&umem->sg_head);
if (object)
VM_OBJECT_WUNLOCK(object);
+ return;
}
@@ -229,8 +233,13 @@
return;
}
EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
+
/**
* ib_umem_get - Pin and DMA map userspace memory.
+ *
+ * If access flags indicate ODP memory, avoid pinning. Instead, stores
+ * the mm for future page fault handling in conjunction with MMU notifiers.
+ *
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
@@ -241,13 +250,12 @@
size_t size, int access, int dmasync,
int invalidation_supported)
{
-
struct ib_umem *umem;
- struct proc *proc;
+ struct proc *proc;
pmap_t pmap;
- vm_offset_t end, last, start;
- vm_size_t npages;
- int error;
+ vm_offset_t end, last, start;
+ vm_size_t npages;
+ int error;
int ret;
int ents;
int i;
@@ -259,6 +267,9 @@
if (error)
return ERR_PTR(-error);
+ if (!size)
+ return ERR_PTR(-EINVAL);
+
last = addr + size;
start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
@@ -267,26 +278,35 @@
npages = atop(end - start);
if (npages > vm_page_max_wired)
return ERR_PTR(-ENOMEM);
+
+ /*
+ * If the combination of the addr and size requested for this memory
+ * region causes an integer overflow, return error.
+ */
+ if (((addr + size) < addr) ||
+ PAGE_ALIGN(addr + size) < (addr + size))
+ return ERR_PTR(-EINVAL);
+
umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
proc = curthread->td_proc;
PROC_LOCK(proc);
if (ptoa(npages +
- pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) >
- lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
- PROC_UNLOCK(proc);
- kfree(umem);
- return ERR_PTR(-ENOMEM);
+ pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) >
+ lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
+ PROC_UNLOCK(proc);
+ kfree(umem);
+ return ERR_PTR(-ENOMEM);
}
- PROC_UNLOCK(proc);
+ PROC_UNLOCK(proc);
if (npages + vm_cnt.v_wire_count > vm_page_max_wired) {
kfree(umem);
return ERR_PTR(-EAGAIN);
}
error = vm_map_wire(&proc->p_vmspace->vm_map, start, end,
- VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES |
- (umem->writable ? VM_MAP_WIRE_WRITE : 0));
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES |
+ (umem->writable ? VM_MAP_WIRE_WRITE : 0));
if (error != KERN_SUCCESS) {
kfree(umem);
return ERR_PTR(-ENOMEM);
@@ -294,38 +314,40 @@
umem->context = context;
umem->length = size;
- umem->offset = addr & ~PAGE_MASK;
+ umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
- umem->start = addr;
/*
- * We ask for writable memory if any access flags other than
- * "remote read" are set. "Local write" and "remote write"
+ * We ask for writable memory if any of the following
+ * access flags are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
- umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
+ umem->writable = !!(access &
+ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
if (invalidation_supported || context->peer_mem_private_data) {
struct ib_peer_memory_client *peer_mem_client;
peer_mem_client = ib_get_peer_client(context, addr, size,
- &umem->peer_mem_client_context,
- &umem->peer_mem_srcu_key);
+ &umem->peer_mem_client_context,
+ &umem->peer_mem_srcu_key);
if (peer_mem_client)
return peer_umem_get(peer_mem_client, umem, addr,
- dmasync, invalidation_supported);
+ dmasync, invalidation_supported);
}
umem->hugetlb = 0;
pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
- if (npages == 0) {
+
+ if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
- goto out;
- }
+ goto out;
+ }
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
@@ -348,7 +370,7 @@
goto out;
}
sg_set_page(sg, PHYS_TO_VM_PAGE(pa),
- PAGE_SIZE, 0);
+ PAGE_SIZE, 0);
npages--;
start += PAGE_SIZE;
}
@@ -360,17 +382,17 @@
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
umem->npages,
- DMA_BIDIRECTIONAL,
- &attrs);
+ DMA_BIDIRECTIONAL,
+ &attrs);
if (umem->nmap != umem->npages) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = -ENOMEM;
+ goto out;
+ }
out:
if (ret < 0) {
if (need_release)
- __ib_umem_release(context->device, umem, 0);
+ __ib_umem_release(context->device, umem, 0);
kfree(umem);
}
@@ -417,12 +439,11 @@
addr = umem->start;
size = umem->length;
last = addr + size;
- start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
+ start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, end,
- VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
kfree(umem);
-
}
EXPORT_SYMBOL(ib_umem_release);
Index: sys/ofed/drivers/infiniband/core/user_mad.c
===================================================================
--- sys/ofed/drivers/infiniband/core/user_mad.c
+++ sys/ofed/drivers/infiniband/core/user_mad.c
@@ -33,6 +33,8 @@
* SOFTWARE.
*/
+#define pr_fmt(fmt) "user_mad: " fmt
+
#include <linux/module.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -52,16 +54,22 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
-MODULE_LICENSE("Dual BSD/GPL");
+/* InfiniBand userspace MAD packet access */
+
+/* Need to change it to use TUNNABLE */
+static int enable_rx_threshold;
+module_param(enable_rx_threshold, int, 0444);
+MODULE_PARM_DESC(enable_rx_threshold, "Enable threshold for receive queue if non-zero (default=0)");
enum {
IB_UMAD_MAX_PORTS = 64,
IB_UMAD_MAX_AGENTS = 32,
IB_UMAD_MAJOR = 231,
- IB_UMAD_MINOR_BASE = 0
+ IB_UMAD_MINOR_BASE = 0,
+
+ IB_UMAD_RX_THRESHOLD = 10000,
+ IB_UMAD_RX_MANAGER_THRESHOLD = 100000
};
/*
@@ -79,10 +87,10 @@
*/
struct ib_umad_port {
- struct cdev *cdev;
+ struct cdev cdev;
struct device *dev;
- struct cdev *sm_cdev;
+ struct cdev sm_cdev;
struct device *sm_dev;
struct semaphore sm_sem;
@@ -93,20 +101,25 @@
struct ib_umad_device *umad_dev;
int dev_num;
u8 port_num;
- struct list_head port_lst;
};
struct ib_umad_device {
int start_port, end_port;
- struct kref ref;
+ struct kobject kobj;
struct ib_umad_port port[0];
};
+struct counted_list {
+ struct list_head list;
+ int count;
+ int threshold;
+};
+
struct ib_umad_file {
struct mutex mutex;
struct ib_umad_port *port;
struct file *filp;
- struct list_head recv_list;
+ struct counted_list recv_list;
struct list_head send_list;
struct list_head port_list;
spinlock_t send_lock;
@@ -131,85 +144,21 @@
static DEFINE_SPINLOCK(port_lock);
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
-static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
-static DEFINE_SPINLOCK(ports_list_lock);
-static struct list_head ports_list;
-
-
-static void remove_ports(struct kref *ref)
-{
- int i;
- struct ib_umad_port *p, *p1;
- struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
-
- for (i = 0; i <= dev->end_port - dev->start_port; ++i) {
- struct ib_umad_port *port = &dev->port[i];
-
- list_for_each_entry_safe(p, p1, &ports_list, port_lst)
- if (p == port) {
- list_del(&p->port_lst);
- break;
- }
- }
-}
-
-static void put_umad_dev(struct kref *ref)
+static void ib_umad_release_dev(struct kobject *kobj)
{
- int ret, i;
struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
+ container_of(kobj, struct ib_umad_device, kobj);
- spin_lock(&ports_list_lock);
- ret = (kref_put(ref, remove_ports));
- spin_unlock(&ports_list_lock);
- if (ret) {
- for (i = 0; i <= dev->end_port - dev->start_port; ++i) {
- if (dev->port[i].dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(dev->port[i].dev_num, dev_map);
- else
- clear_bit(dev->port[i].dev_num - IB_UMAD_MAX_PORTS, overflow_map);
- cdev_del(dev->port[i].cdev);
- cdev_del(dev->port[i].sm_cdev);
- }
kfree(dev);
- }
-}
-
-static void release_port(struct ib_umad_port *port)
-{
- put_umad_dev(&port->umad_dev->ref);
-}
-
-
-static struct ib_umad_port *get_port(struct cdev *cdev)
-{
- struct ib_umad_port *port;
-
- spin_lock(&ports_list_lock);
- list_for_each_entry(port, &ports_list, port_lst) {
- if (port->cdev == cdev || port->sm_cdev == cdev) {
- kref_get(&port->umad_dev->ref);
- spin_unlock(&ports_list_lock);
-
- return port;
- }
- }
- spin_unlock(&ports_list_lock);
-
- return NULL;
}
-static void insert_port(struct ib_umad_port *port)
-{
- spin_lock(&ports_list_lock);
- list_add(&port->port_lst, &ports_list);
- spin_unlock(&ports_list_lock);
-}
+static struct kobj_type ib_umad_dev_ktype = {
+ .release = ib_umad_release_dev,
+};
static int hdr_size(struct ib_umad_file *file)
{
@@ -235,7 +184,7 @@
packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
packet->mad.hdr.id++)
if (agent == __get_agent(file, packet->mad.hdr.id)) {
- list_add_tail(&packet->list, &file->recv_list);
+ list_add_tail(&packet->list, &file->recv_list.list);
selwakeup(&file->filp->f_selinfo);
wake_up_interruptible(&file->recv_wait);
ret = 0;
@@ -274,15 +223,56 @@
kfree(packet);
}
+static int get_mads_count(int packet_length, int hdr_len)
+{
+ int seg_len, data_len, mads_count;
+
+ data_len = packet_length - hdr_len;
+ seg_len = sizeof(struct ib_mad) - hdr_len;
+ mads_count = (data_len - 1) / seg_len + 1;
+
+ return mads_count;
+}
+
+static int is_mad_rmpp(struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+
+ rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
+ if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) &&
+ (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ return 1;
+ }
+ return 0;
+}
+
static void recv_handler(struct ib_mad_agent *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_umad_file *file = agent->context;
struct ib_umad_packet *packet;
+ int mgmt_class;
+ int data_offset;
+ int drop = 0;
+ int mad_is_rmpp;
if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
goto err1;
+ mad_is_rmpp = is_mad_rmpp(mad_recv_wc);
+ if (!agent->rmpp_version && mad_is_rmpp)
+ goto err1;
+
+ mutex_lock(&file->mutex);
+/*For now we accept all RMPPs packets, even though we crossed the threshold*/
+ if (enable_rx_threshold &&
+ !mad_is_rmpp && file->recv_list.count >= file->recv_list.threshold)
+ drop = 1;
+ mutex_unlock(&file->mutex);
+
+ if (drop)
+ goto err1;
+
packet = kzalloc(sizeof *packet, GFP_KERNEL);
if (!packet)
goto err1;
@@ -314,6 +304,13 @@
if (queue_packet(file, agent, packet))
goto err2;
+
+ mgmt_class = mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class;
+ data_offset = ib_get_mad_data_offset(mgmt_class);
+ mutex_lock(&file->mutex);
+ file->recv_list.count += get_mads_count(packet->length, data_offset);
+ mutex_unlock(&file->mutex);
+
return;
err2:
@@ -403,20 +400,20 @@
mutex_lock(&file->mutex);
- while (list_empty(&file->recv_list)) {
+ while (list_empty(&file->recv_list.list)) {
mutex_unlock(&file->mutex);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(file->recv_wait,
- !list_empty(&file->recv_list)))
+ !list_empty(&file->recv_list.list)))
return -ERESTARTSYS;
mutex_lock(&file->mutex);
}
- packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
+ packet = list_entry(file->recv_list.list.next, struct ib_umad_packet, list);
list_del(&packet->list);
mutex_unlock(&file->mutex);
@@ -429,11 +426,21 @@
if (ret < 0) {
/* Requeue packet */
mutex_lock(&file->mutex);
- list_add(&packet->list, &file->recv_list);
+ list_add(&packet->list, &file->recv_list.list);
mutex_unlock(&file->mutex);
} else {
- if (packet->recv_wc)
+ if (packet->recv_wc) {
+ int mgmt_class;
+ int data_offset;
+
+ mgmt_class = packet->recv_wc->recv_buf.mad->mad_hdr.mgmt_class;
+ data_offset = ib_get_mad_data_offset(mgmt_class);
+ mutex_lock(&file->mutex);
+ file->recv_list.count -= get_mads_count(packet->length,
+ data_offset);
+ mutex_unlock(&file->mutex);
ib_free_recv_mad(packet->recv_wc);
+ }
kfree(packet);
}
return ret;
@@ -557,8 +564,8 @@
ah_attr.ah_flags = IB_AH_GRH;
memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
+ ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
+ ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
}
@@ -589,9 +596,9 @@
goto err_ah;
}
- packet->msg->ah = ah;
+ packet->msg->ah = ah;
packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
- packet->msg->retries = packet->mad.hdr.retries;
+ packet->msg->retries = packet->mad.hdr.retries;
packet->msg->context[0] = packet;
/* Copy MAD header. Any RMPP header is already in place. */
@@ -661,12 +668,29 @@
poll_wait(filp, &file->recv_wait, wait);
- if (!list_empty(&file->recv_list))
+ if (!list_empty(&file->recv_list.list))
mask |= POLLIN | POLLRDNORM;
return mask;
}
+static void update_mgmt_threshold(struct ib_umad_file *file, struct ib_mad_reg_req req)
+{
+ int i;
+
+ /*Update managers' class rx threshold*/
+ for_each_set_bit(i, req.method_mask, IB_MGMT_MAX_METHODS) {
+ if (i == IB_MGMT_METHOD_GET ||
+ i == IB_MGMT_METHOD_SET ||
+ i == IB_MGMT_METHOD_REPORT ||
+ i == IB_MGMT_METHOD_TRAP) {
+ file->recv_list.threshold =
+ IB_UMAD_RX_MANAGER_THRESHOLD;
+ break;
+ }
+ }
+}
+
static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
int compat_method_mask)
{
@@ -680,6 +704,8 @@
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
+ dev_info(file->port->dev,
+ "ib_umad_reg_agent: invalid device\n");
ret = -EPIPE;
goto out;
}
@@ -690,6 +716,9 @@
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
+ dev_info(file->port->dev,
+ "ib_umad_reg_agent: invalid QPN %d specified\n",
+ ureq.qpn);
ret = -EINVAL;
goto out;
}
@@ -698,11 +727,15 @@
if (!__get_agent(file, agent_id))
goto found;
+ dev_info(file->port->dev,
+ "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
found:
if (ureq.mgmt_class) {
+ memset(&req, 0, sizeof(req));
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -717,6 +750,8 @@
} else
memcpy(req.method_mask, ureq.method_mask,
sizeof req.method_mask);
+
+ update_mgmt_threshold(file, req);
}
agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
@@ -739,10 +774,11 @@
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- printk(KERN_WARNING "user_mad: process %s did not enable "
- "P_Key index support.\n", curthread->td_proc->p_comm);
- printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
- "has info on the new ABI.\n");
+ dev_warn(file->port->dev,
+ "process %s did not enable P_Key index support.\n",
+ curthread->td_proc->p_comm);
+ dev_warn(file->port->dev,
+ " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
}
}
@@ -805,6 +841,33 @@
return ret;
}
+static long ib_umad_update_threshold(struct ib_umad_file *file, void __user
+ *arg)
+{
+ struct ib_user_mad_thresh_req ureq;
+ int ret = 0;
+
+ mutex_lock(&file->port->file_mutex);
+ mutex_lock(&file->mutex);
+
+ if (!file->port->ib_dev) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (copy_from_user(&ureq, arg, sizeof(ureq))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ file->recv_list.threshold = ureq.threshold;
+out:
+ mutex_unlock(&file->mutex);
+ mutex_unlock(&file->port->file_mutex);
+
+ return ret;
+}
+
static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -815,6 +878,8 @@
return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_UPDATE_THRESHOLD:
+ return ib_umad_update_threshold(filp->private_data, (void __user *)arg);
default:
return -ENOIOCTLCMD;
}
@@ -831,12 +896,21 @@
return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
+ case IB_USER_MAD_UPDATE_THRESHOLD:
+ return ib_umad_update_threshold(filp->private_data, compat_ptr(arg));
default:
return -ENOIOCTLCMD;
}
}
#endif
+static void init_recv_list(struct counted_list *recv_list)
+{
+ INIT_LIST_HEAD(&recv_list->list);
+ recv_list->count = 0;
+ recv_list->threshold = IB_UMAD_RX_THRESHOLD;
+}
+
/*
* ib_umad_open() does not need the BKL:
*
@@ -850,30 +924,23 @@
{
struct ib_umad_port *port;
struct ib_umad_file *file;
- int ret;
+ int ret = -ENXIO;
- port = get_port(inode->i_cdev->si_drv1);
- if (!port)
- return -ENXIO;
+ port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, cdev);
mutex_lock(&port->file_mutex);
- if (!port->ib_dev) {
- release_port(port);
- ret = -ENXIO;
+ if (!port->ib_dev)
goto out;
- }
+ ret = -ENOMEM;
file = kzalloc(sizeof *file, GFP_KERNEL);
- if (!file) {
- release_port(port);
- ret = -ENOMEM;
+ if (!file)
goto out;
- }
mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
- INIT_LIST_HEAD(&file->recv_list);
+ init_recv_list(&file->recv_list);
INIT_LIST_HEAD(&file->send_list);
init_waitqueue_head(&file->recv_wait);
@@ -884,6 +951,13 @@
list_add_tail(&file->port_list, &port->file_list);
ret = nonseekable_open(inode, filp);
+ if (ret) {
+ list_del(&file->port_list);
+ kfree(file);
+ goto out;
+ }
+
+ kobject_get(&port->umad_dev->kobj);
out:
mutex_unlock(&port->file_mutex);
@@ -893,7 +967,7 @@
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
- struct ib_umad_port *port = file->port;
+ struct ib_umad_device *dev = file->port->umad_dev;
struct ib_umad_packet *packet, *tmp;
int already_dead;
int i;
@@ -904,7 +978,7 @@
already_dead = file->agents_dead;
file->agents_dead = 1;
- list_for_each_entry_safe(packet, tmp, &file->recv_list, list) {
+ list_for_each_entry_safe(packet, tmp, &file->recv_list.list, list) {
if (packet->recv_wc)
ib_free_recv_mad(packet->recv_wc);
kfree(packet);
@@ -922,21 +996,21 @@
mutex_unlock(&file->port->file_mutex);
kfree(file);
- release_port(port);
+ kobject_put(&dev->kobj);
return 0;
}
static const struct file_operations umad_fops = {
- .owner = THIS_MODULE,
- .read = ib_umad_read,
- .write = ib_umad_write,
- .poll = ib_umad_poll,
+ .owner = THIS_MODULE,
+ .read = ib_umad_read,
+ .write = ib_umad_write,
+ .poll = ib_umad_poll,
.unlocked_ioctl = ib_umad_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = ib_umad_compat_ioctl,
+ .compat_ioctl = ib_umad_compat_ioctl,
#endif
- .open = ib_umad_open,
+ .open = ib_umad_open,
.release = ib_umad_close,
.llseek = no_llseek,
};
@@ -949,9 +1023,7 @@
};
int ret;
- port = get_port(inode->i_cdev->si_drv1);
- if (!port)
- return -ENXIO;
+ port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, sm_cdev);
if (filp->f_flags & O_NONBLOCK) {
if (down_trylock(&port->sm_sem)) {
@@ -966,17 +1038,27 @@
}
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
- if (ret) {
- up(&port->sm_sem);
- goto fail;
- }
+ if (ret)
+ goto err_up_sem;
filp->private_data = port;
- return nonseekable_open(inode, filp);
+ ret = nonseekable_open(inode, filp);
+ if (ret)
+ goto err_clr_sm_cap;
+
+ kobject_get(&port->umad_dev->kobj);
+
+ return 0;
+
+err_clr_sm_cap:
+ swap(props.set_port_cap_mask, props.clr_port_cap_mask);
+ ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+
+err_up_sem:
+ up(&port->sm_sem);
fail:
- release_port(port);
return ret;
}
@@ -995,14 +1077,14 @@
up(&port->sm_sem);
- release_port(port);
+ kobject_put(&port->umad_dev->kobj);
return ret;
}
static const struct file_operations umad_sm_fops = {
- .owner = THIS_MODULE,
- .open = ib_umad_sm_open,
+ .owner = THIS_MODULE,
+ .open = ib_umad_sm_open,
.release = ib_umad_sm_close,
.llseek = no_llseek,
};
@@ -1039,12 +1121,13 @@
static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+ return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static dev_t overflow_maj;
-static int find_overflow_devnum(void)
+static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
+static int find_overflow_devnum(struct ib_device *device)
{
int ret;
@@ -1052,7 +1135,8 @@
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ dev_err(&device->dev,
+ "couldn't register dynamic device number\n");
return ret;
}
}
@@ -1065,6 +1149,7 @@
}
static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
@@ -1074,9 +1159,9 @@
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
- devnum = find_overflow_devnum();
+ devnum = find_overflow_devnum(device);
if (devnum < 0)
- return -1;
+ return -1;
spin_lock(&port_lock);
port->dev_num = devnum + IB_UMAD_MAX_PORTS;
@@ -1095,18 +1180,15 @@
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
- port->cdev = cdev_alloc();
- if (!port->cdev)
- goto err_cdev_c;
-
- port->cdev->ops = &umad_fops;
- port->cdev->owner = THIS_MODULE;
- kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
- if (cdev_add(port->cdev, base, 1))
+ cdev_init(&port->cdev, &umad_fops);
+ port->cdev.owner = THIS_MODULE;
+ port->cdev.kobj.parent = &umad_dev->kobj;
+ kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
+ if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dma_device,
- port->cdev->dev, port,
+ port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
goto err_cdev;
@@ -1117,18 +1199,15 @@
goto err_dev;
base += IB_UMAD_MAX_PORTS;
- port->sm_cdev = cdev_alloc();
- if (!port->sm_cdev)
- goto err_dev;
-
- port->sm_cdev->ops = &umad_sm_fops;
- port->sm_cdev->owner = THIS_MODULE;
- kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
- if (cdev_add(port->sm_cdev, base, 1))
+ cdev_init(&port->sm_cdev, &umad_sm_fops);
+ port->sm_cdev.owner = THIS_MODULE;
+ port->sm_cdev.kobj.parent = &umad_dev->kobj;
+ kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
+ if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dma_device,
- port->sm_cdev->dev, port,
+ port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
goto err_sm_cdev;
@@ -1141,17 +1220,16 @@
return 0;
err_sm_dev:
- device_destroy(umad_class, port->sm_cdev->dev);
+ device_destroy(umad_class, port->sm_cdev.dev);
err_sm_cdev:
- cdev_del(port->sm_cdev);
+ cdev_del(&port->sm_cdev);
err_dev:
- device_destroy(umad_class, port->cdev->dev);
+ device_destroy(umad_class, port->cdev.dev);
err_cdev:
- cdev_del(port->cdev);
-err_cdev_c:
+ cdev_del(&port->cdev);
if (port->dev_num < IB_UMAD_MAX_PORTS)
clear_bit(devnum, dev_map);
else
@@ -1168,8 +1246,11 @@
dev_set_drvdata(port->dev, NULL);
dev_set_drvdata(port->sm_dev, NULL);
- device_destroy(umad_class, port->cdev->dev);
- device_destroy(umad_class, port->sm_cdev->dev);
+ device_destroy(umad_class, port->cdev.dev);
+ device_destroy(umad_class, port->sm_cdev.dev);
+
+ cdev_del(&port->cdev);
+ cdev_del(&port->sm_cdev);
mutex_lock(&port->file_mutex);
@@ -1186,6 +1267,11 @@
}
mutex_unlock(&port->file_mutex);
+
+ if (port->dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(port->dev_num, dev_map);
+ else
+ clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1209,19 +1295,17 @@
if (!umad_dev)
return;
- kref_init(&umad_dev->ref);
+ kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
umad_dev->start_port = s;
umad_dev->end_port = e;
- for (i = 0; i <= e - s; ++i)
- insert_port(&umad_dev->port[i]);
-
for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
- if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
- goto err;
+ if (ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->port[i - s]))
+ goto err;
}
ib_set_client_data(device, &umad_client, umad_dev);
@@ -1230,9 +1314,9 @@
err:
while (--i >= s)
- ib_umad_kill_port(&umad_dev->port[i - s]);
+ ib_umad_kill_port(&umad_dev->port[i - s]);
- put_umad_dev(&umad_dev->ref);
+ kobject_put(&umad_dev->kobj);
}
static void ib_umad_remove_one(struct ib_device *device)
@@ -1244,9 +1328,9 @@
return;
for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
- ib_umad_kill_port(&umad_dev->port[i]);
+ ib_umad_kill_port(&umad_dev->port[i]);
- put_umad_dev(&umad_dev->ref);
+ kobject_put(&umad_dev->kobj);
}
static char *umad_devnode(struct device *dev, umode_t *mode)
@@ -1258,19 +1342,17 @@
{
int ret;
- INIT_LIST_HEAD(&ports_list);
-
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register device number\n");
+ pr_err("couldn't register device number\n");
goto out;
}
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
- printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+ pr_err("couldn't create class infiniband_mad\n");
goto out_chrdev;
}
@@ -1278,13 +1360,13 @@
ret = class_create_file(umad_class, &class_attr_abi_version);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+ pr_err("couldn't create abi_version attribute\n");
goto out_class;
}
ret = ib_register_client(&umad_client);
if (ret) {
- printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+ pr_err("couldn't register ib_umad client\n");
goto out_class;
}
@@ -1309,5 +1391,5 @@
unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
}
-module_init(ib_umad_init);
+module_init_order(ib_umad_init, SI_ORDER_THIRD);
module_exit(ib_umad_cleanup);
Index: sys/ofed/drivers/infiniband/core/uverbs.h
===================================================================
--- sys/ofed/drivers/infiniband/core/uverbs.h
+++ sys/ofed/drivers/infiniband/core/uverbs.h
@@ -43,6 +43,7 @@
#include <linux/completion.h>
#include <linux/cdev.h>
#include <linux/rbtree.h>
+#include <linux/srcu.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_verbs_exp.h>
@@ -50,6 +51,18 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_user_verbs_exp.h>
+#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \
+ (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+enum uverbs_flags {
+ UVERBS_FLAG_DISASSOCIATE = 1
+};
+
/*
* Our lifetime rules for these structs are the following:
*
@@ -81,11 +94,17 @@
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
+ struct mutex disassociate_mutex; /* protect lists of files. */
+ int disassociated;
+ u32 flags;
+ struct srcu_struct disassociate_srcu;
+ struct list_head uverbs_file_list;
+ struct list_head uverbs_events_file_list;
};
struct ib_uverbs_event_file {
struct kref ref;
- struct file *filp;
+ struct file *filp;
int is_async;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
@@ -93,6 +112,7 @@
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+ struct list_head list;
};
struct ib_uverbs_file {
@@ -102,6 +122,7 @@
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
+ struct list_head list;
};
struct ib_uverbs_event {
@@ -152,7 +173,7 @@
};
struct ib_udct_object {
- struct ib_uobject uobject;
+ struct ib_uevent_object uevent;
};
extern spinlock_t ib_uverbs_idr_lock;
@@ -166,6 +187,8 @@
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
extern struct idr ib_uverbs_dct_idr;
+extern struct idr ib_uverbs_wq_idr;
+extern struct idr ib_uverbs_rwq_ind_tbl_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
@@ -186,6 +209,7 @@
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+void ib_uverbs_dct_event_handler(struct ib_event *event, void *context_ptr);
struct ib_uverbs_flow_spec {
union {
@@ -215,6 +239,7 @@
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(rereg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(alloc_mw);
IB_UVERBS_DECLARE_CMD(dealloc_mw);
@@ -245,17 +270,18 @@
IB_UVERBS_DECLARE_CMD(close_xrcd);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
- int ib_uverbs_ex_##name(struct ib_uverbs_file *file,\
- struct ib_udata *ucore, \
+ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
+ struct ib_udata *ucore, \
struct ib_udata *uhw)
-#define IB_UVERBS_DECLARE_EXP_CMD(name) \
- ssize_t ib_uverbs_exp_##name(struct ib_uverbs_file *file, \
- struct ib_udata *ucore, \
- struct ib_udata *uhw)
-
IB_UVERBS_DECLARE_EX_CMD(create_flow);
IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
+IB_UVERBS_DECLARE_EX_CMD(query_device);
+
+#define IB_UVERBS_DECLARE_EXP_CMD(name) \
+ int ib_uverbs_exp_##name(struct ib_uverbs_file *file, \
+ struct ib_udata *ucore, \
+ struct ib_udata *uhw)
IB_UVERBS_DECLARE_EXP_CMD(create_qp);
IB_UVERBS_DECLARE_EXP_CMD(modify_cq);
@@ -265,5 +291,17 @@
IB_UVERBS_DECLARE_EXP_CMD(create_dct);
IB_UVERBS_DECLARE_EXP_CMD(destroy_dct);
IB_UVERBS_DECLARE_EXP_CMD(query_dct);
+IB_UVERBS_DECLARE_EXP_CMD(arm_dct);
+IB_UVERBS_DECLARE_EXP_CMD(create_mr);
+IB_UVERBS_DECLARE_EXP_CMD(query_mkey);
+IB_UVERBS_DECLARE_EXP_CMD(reg_mr_ex);
+IB_UVERBS_DECLARE_EXP_CMD(prefetch_mr);
+IB_UVERBS_DECLARE_EXP_CMD(rereg_mr);
+IB_UVERBS_DECLARE_EXP_CMD(create_wq);
+IB_UVERBS_DECLARE_EXP_CMD(modify_wq);
+IB_UVERBS_DECLARE_EXP_CMD(destroy_wq);
+IB_UVERBS_DECLARE_EXP_CMD(create_rwq_ind_table);
+IB_UVERBS_DECLARE_EXP_CMD(destroy_rwq_ind_table);
+IB_UVERBS_DECLARE_EXP_CMD(create_flow);
#endif /* UVERBS_H */
Index: sys/ofed/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- sys/ofed/drivers/infiniband/core/uverbs_cmd.c
+++ sys/ofed/drivers/infiniband/core/uverbs_cmd.c
@@ -39,21 +39,24 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/moduleparam.h>
+#include <linux/printk.h>
#include <linux/rbtree.h>
#include <linux/lockdep.h>
#include <rdma/ib_addr.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/fcntl.h>
#include <sys/priv.h>
#include "uverbs.h"
+#include "core_priv.h"
static int disable_raw_qp_enforcement;
module_param_named(disable_raw_qp_enforcement, disable_raw_qp_enforcement, int,
- 0444);
-MODULE_PARM_DESC(disable_raw_qp_enforcement, "Disable RAW QP enforcement for "
- "being opened by root (default: 0)");
+ 0444);
+MODULE_PARM_DESC(disable_raw_qp_enforcement, "Disable RAW QP enforcement for "
+ "being opened by root (default: 0)");
struct uverbs_lock_class {
struct lock_class_key key;
@@ -69,6 +72,9 @@
static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
static struct uverbs_lock_class dct_lock_class = { .name = "DCT-uobj" };
+static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
+static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
+static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
static int uverbs_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
@@ -92,11 +98,13 @@
(udata)->outbuf = (void __user *) (obuf); \
(udata)->inlen = (ilen); \
(udata)->outlen = (olen); \
+ (udata)->src = IB_UDATA_LEGACY_CMD; \
} while (0)
enum uverbs_cmd_type {
IB_USER_VERBS_CMD_BASIC,
- IB_USER_VERBS_CMD_EXTENDED
+ IB_USER_VERBS_CMD_EXTENDED,
+ IB_USER_VERBS_CMD_EXP
};
/*
@@ -283,6 +291,27 @@
return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
}
+static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
+}
+
+static void put_wq_read(struct ib_wq *wq)
+{
+ put_uobj_read(wq->uobject);
+}
+
+static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
+ struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
+}
+
+static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
+{
+ put_uobj_read(ind_table->uobject);
+}
+
static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
{
struct ib_uobject *uobj;
@@ -333,6 +362,16 @@
put_uobj_read(uobj);
}
+static struct ib_mr *idr_read_mr(int mr_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_mr_idr, mr_handle, context, 0);
+}
+
+static void put_mr_read(struct ib_mr *mr)
+{
+ put_uobj_read(mr->uobject);
+}
+
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -376,19 +415,19 @@
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
+ INIT_LIST_HEAD(&ucontext->wq_list);
+ INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
INIT_LIST_HEAD(&ucontext->rule_list);
INIT_LIST_HEAD(&ucontext->dct_list);
ucontext->closing = 0;
- ucontext->peer_mem_private_data = NULL;
- ucontext->peer_mem_name = NULL;
resp.num_comp_vectors = file->device->num_comp_vectors;
- ret = get_unused_fd();
+ ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
goto err_free;
- resp.async_fd = ret;
+ resp.async_fd = ret;
filp = ib_uverbs_alloc_event_file(file, 1);
if (IS_ERR(filp)) {
@@ -396,6 +435,12 @@
goto err_fd;
}
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_file;
+ }
+
file->async_file = filp->private_data;
INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
@@ -404,11 +449,6 @@
if (ret)
goto err_file;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_file;
- }
kref_get(&file->async_file->ref);
kref_get(&file->ref);
file->ucontext = ucontext;
@@ -433,53 +473,50 @@
return ret;
}
-static void ib_uverbs_query_device_assign(
- struct ib_uverbs_query_device_resp *resp,
- struct ib_device_attr *attr,
- struct ib_uverbs_file *file)
+static void copy_query_dev_fields(struct ib_uverbs_file *file,
+ struct ib_uverbs_query_device_resp *resp,
+ struct ib_device_attr *attr)
{
- memset(resp, 0, sizeof(*resp));
-
- resp->fw_ver = attr->fw_ver;
- resp->node_guid = file->device->ib_dev->node_guid;
- resp->sys_image_guid = attr->sys_image_guid;
- resp->max_mr_size = attr->max_mr_size;
- resp->page_size_cap = attr->page_size_cap;
- resp->vendor_id = attr->vendor_id;
- resp->vendor_part_id = attr->vendor_part_id;
- resp->hw_ver = attr->hw_ver;
- resp->max_qp = attr->max_qp;
- resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = attr->device_cap_flags;
- resp->max_sge = attr->max_sge;
- resp->max_sge_rd = attr->max_sge_rd;
- resp->max_cq = attr->max_cq;
- resp->max_cqe = attr->max_cqe;
- resp->max_mr = attr->max_mr;
- resp->max_pd = attr->max_pd;
- resp->max_qp_rd_atom = attr->max_qp_rd_atom;
- resp->max_ee_rd_atom = attr->max_ee_rd_atom;
- resp->max_res_rd_atom = attr->max_res_rd_atom;
- resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom;
- resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom;
- resp->atomic_cap = attr->atomic_cap;
- resp->max_ee = attr->max_ee;
- resp->max_rdd = attr->max_rdd;
- resp->max_mw = attr->max_mw;
- resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp;
- resp->max_raw_ethy_qp = attr->max_raw_ethy_qp;
- resp->max_mcast_grp = attr->max_mcast_grp;
- resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
- resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
- resp->max_ah = attr->max_ah;
- resp->max_fmr = attr->max_fmr;
- resp->max_map_per_fmr = attr->max_map_per_fmr;
- resp->max_srq = attr->max_srq;
- resp->max_srq_wr = attr->max_srq_wr;
- resp->max_srq_sge = attr->max_srq_sge;
- resp->max_pkeys = attr->max_pkeys;
- resp->local_ca_ack_delay = attr->local_ca_ack_delay;
- resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+ resp->fw_ver = attr->fw_ver;
+ resp->node_guid = file->device->ib_dev->node_guid;
+ resp->sys_image_guid = attr->sys_image_guid;
+ resp->max_mr_size = attr->max_mr_size;
+ resp->page_size_cap = attr->page_size_cap;
+ resp->vendor_id = attr->vendor_id;
+ resp->vendor_part_id = attr->vendor_part_id;
+ resp->hw_ver = attr->hw_ver;
+ resp->max_qp = attr->max_qp;
+ resp->max_qp_wr = attr->max_qp_wr;
+ resp->device_cap_flags = attr->device_cap_flags;
+ resp->max_sge = attr->max_sge;
+ resp->max_sge_rd = attr->max_sge_rd;
+ resp->max_cq = attr->max_cq;
+ resp->max_cqe = attr->max_cqe;
+ resp->max_mr = attr->max_mr;
+ resp->max_pd = attr->max_pd;
+ resp->max_qp_rd_atom = attr->max_qp_rd_atom;
+ resp->max_ee_rd_atom = attr->max_ee_rd_atom;
+ resp->max_res_rd_atom = attr->max_res_rd_atom;
+ resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom;
+ resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom;
+ resp->atomic_cap = attr->atomic_cap;
+ resp->max_ee = attr->max_ee;
+ resp->max_rdd = attr->max_rdd;
+ resp->max_mw = attr->max_mw;
+ resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp;
+ resp->max_raw_ethy_qp = attr->max_raw_ethy_qp;
+ resp->max_mcast_grp = attr->max_mcast_grp;
+ resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
+ resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
+ resp->max_ah = attr->max_ah;
+ resp->max_fmr = attr->max_fmr;
+ resp->max_map_per_fmr = attr->max_map_per_fmr;
+ resp->max_srq = attr->max_srq;
+ resp->max_srq_wr = attr->max_srq_wr;
+ resp->max_srq_sge = attr->max_srq_sge;
+ resp->max_pkeys = attr->max_pkeys;
+ resp->local_ca_ack_delay = attr->local_ca_ack_delay;
+ resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt;
}
ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
@@ -501,10 +538,18 @@
if (ret)
return ret;
- ib_uverbs_query_device_assign(&resp, &attr, file);
+ memset(&resp, 0, sizeof resp);
- if (copy_to_user((void __user *)(unsigned long) cmd.response,
- &resp, sizeof(resp)))
+ copy_query_dev_fields(file, &resp, &attr);
+
+ if (resp.atomic_cap > IB_ATOMIC_GLOB)
+ resp.atomic_cap = IB_ATOMIC_NONE;
+
+ resp.device_cap_flags &= ~IB_EXP_DEVICE_MASK;
+
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
return -EFAULT;
return in_len;
@@ -755,12 +800,12 @@
}
ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_open_xrcd cmd;
struct ib_uverbs_open_xrcd_resp resp;
- struct ib_udata udata;
+ struct ib_udata udata;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
struct fd f = {NULL};
@@ -776,7 +821,7 @@
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ in_len - sizeof cmd, out_len - sizeof resp);
mutex_lock(&file->device->xrcd_tree_mutex);
@@ -797,7 +842,7 @@
}
if (xrcd && cmd.oflags & O_EXCL) {
- ret = -EINVAL;
+ ret = -EINVAL;
goto err_tree_mutex_unlock;
}
}
@@ -892,11 +937,11 @@
}
ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_close_xrcd cmd;
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
struct ib_xrcd *xrcd = NULL;
struct inode *inode = NULL;
struct ib_uxrcd_object *obj;
@@ -924,8 +969,8 @@
if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
+ if (!ret)
+ uobj->live = 0;
}
live = uobj->live;
@@ -968,17 +1013,28 @@
xrcd_table_delete(dev, inode);
}
+#define KEEP_ACCESS_FLAGS (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | \
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC | \
+ IB_ACCESS_MW_BIND)
+static int translate_exp_access_flags(u64 exp_access_flags)
+{
+ int access_flags = exp_access_flags & KEEP_ACCESS_FLAGS;
+ if (exp_access_flags & IB_UVERBS_EXP_ACCESS_MW_ZERO_BASED)
+ access_flags |= IB_ZERO_BASED;
+ return access_flags;
+}
+
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
+ struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
- int ret;
+ int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1006,9 +1062,10 @@
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
- ret = -EINVAL;
+ ret = -EINVAL;
goto err_free;
}
+
/* We first get a new "obj id" to be passed later to reg mr for
further use as mr_id.
*/
@@ -1020,7 +1077,7 @@
cmd.access_flags, &udata, uobj->id);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
- goto err_remove_uobj;
+ goto err_put;
}
mr->device = pd->device;
@@ -1055,11 +1112,10 @@
return in_len;
err_copy:
- ib_dereg_mr(mr);
-
-err_remove_uobj:
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+ ib_dereg_mr(mr);
+
err_put:
put_pd_read(pd);
@@ -1068,14 +1124,195 @@
return ret;
}
+int ib_uverbs_exp_rereg_mr(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_rereg_mr cmd;
+ struct ib_uverbs_exp_rereg_mr_resp resp;
+ struct ib_pd *pd = NULL;
+ struct ib_mr *mr;
+ struct ib_pd *old_pd;
+ int ret;
+ struct ib_uobject *uobj;
+
+ if (ucore->outlen < sizeof(resp))
+ return -ENOSPC;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (cmd.flags & ~IB_EXP_MR_REREG_SUPPORTED || !cmd.flags)
+ return -EINVAL;
+
+ if ((cmd.flags & IB_EXP_MR_REREG_TRANS) &&
+ (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+ file->ucontext);
+
+ if (!uobj)
+ return -EINVAL;
+
+ mr = uobj->object;
+
+ if (cmd.flags & IB_EXP_MR_REREG_ACCESS) {
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ goto put_uobjs;
+ }
+
+ if (cmd.flags & IB_EXP_MR_REREG_PD) {
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+ }
+
+ if (atomic_read(&mr->usecnt)) {
+ ret = -EBUSY;
+ goto put_uobj_pd;
+ }
+
+ old_pd = mr->pd;
+ ret = mr->device->exp_rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd);
+ if (!ret) {
+ if (cmd.flags & IB_EXP_MR_REREG_PD) {
+ atomic_inc(&pd->usecnt);
+ mr->pd = pd;
+ atomic_dec(&old_pd->usecnt);
+ }
+ } else {
+ goto put_uobj_pd;
+ }
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+ ret = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+put_uobj_pd:
+ if (cmd.flags & IB_EXP_MR_REREG_PD)
+ put_pd_read(pd);
+
+put_uobjs:
+
+ put_uobj_write(mr->uobject);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_rereg_mr cmd;
+ struct ib_uverbs_rereg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_pd *pd = NULL;
+ struct ib_mr *mr;
+ struct ib_pd *old_pd;
+ int ret;
+ struct ib_uobject *uobj;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ return -EINVAL;
+
+ if ((cmd.flags & IB_MR_REREG_TRANS) &&
+ (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+ file->ucontext);
+
+ if (!uobj)
+ return -EINVAL;
+
+ mr = uobj->object;
+
+ if (cmd.flags & IB_MR_REREG_ACCESS) {
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ goto put_uobjs;
+ }
+
+ if (cmd.flags & IB_MR_REREG_PD) {
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+ }
+
+ if (atomic_read(&mr->usecnt)) {
+ ret = -EBUSY;
+ goto put_uobj_pd;
+ }
+
+ old_pd = mr->pd;
+ ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd, &udata);
+ if (!ret) {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_inc(&pd->usecnt);
+ mr->pd = pd;
+ atomic_dec(&old_pd->usecnt);
+ }
+ } else {
+ goto put_uobj_pd;
+ }
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+ else
+ ret = in_len;
+
+put_uobj_pd:
+ if (cmd.flags & IB_MR_REREG_PD)
+ put_pd_read(pd);
+
+put_uobjs:
+
+ put_uobj_write(mr->uobject);
+
+ return ret;
+}
+
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr;
struct ib_uobject *uobj;
- int ret = -EINVAL;
+ int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -1107,8 +1344,8 @@
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_alloc_mw cmd;
struct ib_uverbs_alloc_mw_resp resp;
@@ -1189,13 +1426,13 @@
}
ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
struct ib_mw *mw;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
@@ -1226,67 +1463,195 @@
return in_len;
}
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+int ib_uverbs_exp_reg_mr_ex(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- struct ib_uverbs_create_comp_channel cmd;
- struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ struct ib_uverbs_exp_reg_mr_ex cmd;
+ struct ib_uverbs_exp_reg_mr_resp_ex resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int access_flags;
+ int ret;
+ const int min_cmd_size = offsetof(typeof(cmd), comp_mask) +
+ sizeof(cmd.comp_mask);
+
+ if (ucore->inlen < min_cmd_size) {
+ pr_debug("ib_uverbs_reg_mr: command input length too short\n");
+ return -EINVAL;
+ }
- ret = get_unused_fd();
- if (ret < 0)
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
return ret;
- resp.fd = ret;
- filp = ib_uverbs_alloc_event_file(file, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
+ if (cmd.comp_mask >= IB_UVERBS_EXP_REG_MR_EX_RESERVED) {
+ pr_debug("ib_uverbs_reg_mr: invalid bit in command comp_mask field\n");
+ return -EINVAL;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
- return -EFAULT;
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) {
+ pr_debug("ib_uverbs_reg_mr: HCA virtual address doesn't match host address\n");
+ return -EINVAL;
}
- fd_install(resp.fd, filp);
- return in_len;
-}
-
-static ssize_t create_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len, void *vcmd, int ex,
- void __user *response)
-{
- struct ib_uverbs_create_cq *cmd;
- struct ib_uverbs_create_cq_ex *cmd_e;
- struct ib_uverbs_create_cq_resp resp;
- struct ib_udata udata;
- struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
- struct ib_cq *cq;
- struct ib_cq_init_attr attr;
- int cmd_sz;
- int ret;
+ ret = ib_check_mr_access(cmd.exp_access_flags);
+ if (ret)
+ return ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
- cmd = vcmd;
- cmd_e = vcmd;
- cmd_sz = ex ? sizeof(*cmd_e) : sizeof(*cmd);
- INIT_UDATA(&udata, buf + cmd_sz, response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ pr_debug("ib_uverbs_reg_mr: invalid PD\n");
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ if (cmd.exp_access_flags & IB_UVERBS_EXP_ACCESS_ON_DEMAND) {
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ struct ib_exp_device_attr exp_attr;
+ ret = ib_exp_query_device(pd->device, &exp_attr);
+ if (ret || !(exp_attr.device_cap_flags2 &
+ IB_EXP_DEVICE_ODP)) {
+ pr_debug("ib_uverbs_reg_mr: ODP requested on device without ODP support\n");
+ ret = -EINVAL;
+ goto err_put;
+ }
+#else
+ pr_debug("ib_uverbs_reg_mr: ODP requested but the RDMA subsystem was compiled without ODP support\n");
+ ret = -EINVAL;
+ goto err_put;
+#endif
+ }
+
+ /* We first get a new "obj id" to be passed later to reg mr for
+ further use as mr_id.
+ */
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
+ if (ret)
+ goto err_put;
+
+ access_flags = translate_exp_access_flags(cmd.exp_access_flags);
+ mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ access_flags, uhw, uobj->id);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_remove_uobj;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+
+ uobj->object = mr;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+ resp.mr_handle = uobj->id;
+
+ ret = ib_copy_to_udata(ucore, &resp, sizeof(resp));
+ if (ret)
+ goto err_copy;
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->mr_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return 0;
+
+err_copy:
+ ib_dereg_mr(mr);
+
+err_remove_uobj:
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_comp_channel cmd;
+ struct ib_uverbs_create_comp_channel_resp resp;
+ struct file *filp;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = get_unused_fd_flags(O_CLOEXEC);
+ if (ret < 0)
+ return ret;
+ resp.fd = ret;
+
+ filp = ib_uverbs_alloc_event_file(file, 0);
+ if (IS_ERR(filp)) {
+ put_unused_fd(resp.fd);
+ return PTR_ERR(filp);
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ put_unused_fd(resp.fd);
+ fput(filp);
+ return -EFAULT;
+ }
+
+ /* Taking ref count on uverbs_file to make sure that file won't be
+ * freed till that event file is closed. It will enable accessing the
+ * uverbs_device fields as part of closing the events file and making
+ * sure that uverbs device is available by that time as well.
+ * Note: similar is already done for the async event file.
+ */
+ kref_get(&file->ref);
+ fd_install(resp.fd, filp);
+ return in_len;
+}
+
+static ssize_t create_cq(struct ib_uverbs_file *file,
+ int in_len,
+ int out_len, void *vcmd, int ex,
+ void __user *response, struct ib_udata *udata)
+{
+ struct ib_uverbs_create_cq *cmd;
+ struct ib_uverbs_exp_create_cq *cmd_e;
+ struct ib_uverbs_create_cq_resp resp;
+ struct ib_ucq_object *obj;
+ struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_cq *cq;
+ struct ib_cq_init_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ cmd = vcmd;
+ cmd_e = vcmd;
if (cmd->comp_vector >= file->device->num_comp_vectors)
return -EINVAL;
@@ -1316,10 +1681,10 @@
memset(&attr, 0, sizeof(attr));
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
- if (ex && (cmd_e->comp_mask & IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS))
+ if (ex && (cmd_e->comp_mask & IB_UVERBS_EXP_CREATE_CQ_CAP_FLAGS))
attr.flags = cmd_e->create_flags;
cq = file->device->ib_dev->create_cq(file->device->ib_dev, &attr,
- file->ucontext, &udata);
+ file->ucontext, udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1372,17 +1737,23 @@
}
ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
- struct ib_uverbs_create_cq cmd;
+ struct ib_uverbs_create_cq cmd;
+ struct ib_udata udata;
+ struct ib_uverbs_create_cq_resp resp;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- return create_cq(file, buf, in_len, out_len, &cmd,
- IB_USER_VERBS_CMD_BASIC,
- (void __user *) (unsigned long) cmd.response);
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ return create_cq(file, in_len, out_len, &cmd,
+ IB_USER_VERBS_CMD_BASIC, (void __user *)cmd.response,
+ &udata);
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
@@ -1449,8 +1820,8 @@
}
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp resp;
@@ -1458,7 +1829,7 @@
u8 __user *data_ptr;
struct ib_cq *cq;
struct ib_wc wc;
- int ret;
+ int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -1577,7 +1948,7 @@
const char __user *buf, int in_len,
int out_len)
{
- void __user *response;
+ void __user *response;
struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_device *device;
@@ -1587,43 +1958,50 @@
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- struct ib_qp_init_attr attr;
+ struct ib_qp_init_attr *attr;
int ret;
- union {
- struct ib_uverbs_create_qp basic;
- } cmd_obj;
- struct ib_uverbs_create_qp *cmd;
- size_t cmd_size = 0;
- union {
- struct ib_uverbs_create_qp_resp basic;
- } resp_obj;
- struct ib_uverbs_create_qp_resp *resp;
- size_t resp_size = 0;
-
- cmd_size = sizeof(cmd_obj.basic);
- cmd = &cmd_obj.basic;
-
- resp_size = sizeof(resp_obj.basic);
- resp = &resp_obj.basic;
-
- if (out_len < resp_size)
- return -ENOSPC;
+ struct ib_uverbs_create_qp *cmd;
+ size_t cmd_size;
+ struct ib_uverbs_create_qp_resp *resp;
+ size_t resp_size;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ if (!cmd || !attr || !resp) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
- if (copy_from_user(&cmd_obj, buf, cmd_size))
- return -EFAULT;
+ cmd_size = sizeof(*cmd);
+ resp_size = sizeof(*resp);
- response = (void __user *) (unsigned long) cmd->response;
+ if (out_len < resp_size) {
+ ret = -ENOSPC;
+ goto err_free;
+ }
+
+ if (copy_from_user(cmd, buf, cmd_size)) {
+ ret = -EFAULT;
+ goto err_free;
+ }
+
+ response = (void __user *)cmd->response;
if (!disable_raw_qp_enforcement &&
- cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW))
- return -EPERM;
+ cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW)) {
+ ret = -EPERM;
+ goto err_free;
+ }
INIT_UDATA(&udata, buf + cmd_size, response + resp_size,
in_len - cmd_size, out_len - resp_size);
obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ if (!obj) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
@@ -1636,24 +2014,25 @@
}
device = xrcd->device;
} else {
- if (cmd->qp_type == IB_QPT_XRC_INI) {
+ if (cmd->qp_type == IB_QPT_XRC_INI ||
+ cmd->qp_type == IB_EXP_QPT_DC_INI) {
cmd->max_recv_wr = 0;
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
srq = idr_read_srq(cmd->srq_handle, file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
- ret = -EINVAL;
+ ret = -EINVAL;
goto err_put;
- }
- }
+ }
+ }
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
rcq = idr_read_cq(cmd->recv_cq_handle, file->ucontext, 0);
if (!rcq) {
- ret = -EINVAL;
+ ret = -EINVAL;
goto err_put;
- }
+ }
}
}
@@ -1663,36 +2042,35 @@
if (!pd || !scq) {
ret = -EINVAL;
goto err_put;
- }
+ }
device = pd->device;
- }
+ }
- memset(&attr, 0, sizeof attr);
- attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
- attr.send_cq = scq;
- attr.recv_cq = rcq;
- attr.srq = srq;
- attr.xrcd = xrcd;
- attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- attr.qp_type = cmd->qp_type;
- attr.create_flags = 0;
-
- attr.cap.max_send_wr = cmd->max_send_wr;
- attr.cap.max_recv_wr = cmd->max_recv_wr;
- attr.cap.max_send_sge = cmd->max_send_sge;
- attr.cap.max_recv_sge = cmd->max_recv_sge;
- attr.cap.max_inline_data = cmd->max_inline_data;
+ attr->event_handler = ib_uverbs_qp_event_handler;
+ attr->qp_context = file;
+ attr->send_cq = scq;
+ attr->recv_cq = rcq;
+ attr->srq = srq;
+ attr->xrcd = xrcd;
+ attr->sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr->qp_type = cmd->qp_type;
+ attr->create_flags = 0;
+
+ attr->cap.max_send_wr = cmd->max_send_wr;
+ attr->cap.max_recv_wr = cmd->max_recv_wr;
+ attr->cap.max_send_sge = cmd->max_send_sge;
+ attr->cap.max_recv_sge = cmd->max_recv_sge;
+ attr->cap.max_inline_data = cmd->max_inline_data;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
if (cmd->qp_type == IB_QPT_XRC_TGT)
- qp = ib_create_qp(pd, &attr);
+ qp = ib_create_qp(pd, attr);
else
- qp = device->create_qp(pd, &attr, &udata);
+ qp = device->create_qp(pd, attr, &udata);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1703,19 +2081,19 @@
qp->real_qp = qp;
qp->device = device;
qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+ qp->srq = attr->srq;
+ qp->event_handler = attr->event_handler;
+ qp->qp_context = attr->qp_context;
+ qp->qp_type = attr->qp_type;
atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
- atomic_inc(&attr.send_cq->usecnt);
- if (attr.recv_cq)
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
+ atomic_inc(&attr->send_cq->usecnt);
+ if (attr->recv_cq)
+ atomic_inc(&attr->recv_cq->usecnt);
+ if (attr->srq)
+ atomic_inc(&attr->srq->usecnt);
}
qp->uobject = &obj->uevent.uobject;
@@ -1724,25 +2102,25 @@
if (ret)
goto err_destroy;
- memset(&resp_obj, 0, sizeof(resp_obj));
resp->qpn = qp->qp_num;
resp->qp_handle = obj->uevent.uobject.id;
- resp->max_recv_sge = attr.cap.max_recv_sge;
- resp->max_send_sge = attr.cap.max_send_sge;
- resp->max_recv_wr = attr.cap.max_recv_wr;
- resp->max_send_wr = attr.cap.max_send_wr;
- resp->max_inline_data = attr.cap.max_inline_data;
+ resp->max_recv_sge = attr->cap.max_recv_sge;
+ resp->max_send_sge = attr->cap.max_send_sge;
+ resp->max_recv_wr = attr->cap.max_recv_wr;
+ resp->max_send_wr = attr->cap.max_send_wr;
+ resp->max_inline_data = attr->cap.max_inline_data;
- if (copy_to_user(response, &resp_obj, resp_size)) {
- ret = -EFAULT;
+ if (copy_to_user(response, resp, resp_size)) {
+ ret = -EFAULT;
goto err_copy;
- }
+ }
if (xrcd) {
- obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
atomic_inc(&obj->uxrcd->refcnt);
put_xrcd_read(xrcd_uobj);
- }
+ }
if (pd)
put_pd_read(pd);
@@ -1760,6 +2138,9 @@
obj->uevent.uobject.live = 1;
up_write(&obj->uevent.uobject.mutex);
+ kfree(attr);
+ kfree(cmd);
+ kfree(resp);
return in_len;
@@ -1782,6 +2163,11 @@
put_srq_read(srq);
put_uobj_write(&obj->uevent.uobject);
+
+err_free:
+ kfree(attr);
+ kfree(cmd);
+ kfree(resp);
return ret;
}
@@ -1834,7 +2220,7 @@
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
- }
+ }
qp->uobject = &obj->uevent.uobject;
@@ -1862,6 +2248,7 @@
mutex_unlock(&file->mutex);
obj->uevent.uobject.live = 1;
+
up_write(&obj->uevent.uobject.mutex);
return in_len;
@@ -1879,8 +2266,8 @@
}
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -1992,45 +2379,136 @@
}
}
-static ssize_t __uverbs_modify_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len,
- enum uverbs_cmd_type cmd_type)
+static ssize_t __uverbs_modify_qp(struct ib_uverbs_file *file, int cmd_len,
+ enum uverbs_cmd_type cmd_type,
+ struct ib_uverbs_exp_modify_qp *cmd,
+ int hw_len,
+ struct ib_udata *udata)
{
- struct ib_uverbs_modify_qp_ex cmd;
- struct ib_udata udata;
struct ib_qp *qp;
struct ib_qp_attr *attr;
- struct ib_qp_attr_ex *attrx;
- int ret;
- void *p;
- union ib_gid sgid;
- union ib_gid *dgid;
+ int ret;
u8 port_num;
+ u32 exp_mask = 0;
- if (cmd_type == IB_USER_VERBS_CMD_BASIC) {
- p = &cmd;
- p += sizeof(cmd.comp_mask);
- if (copy_from_user(p, buf,
- sizeof(struct ib_uverbs_modify_qp)))
- return -EFAULT;
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ qp = idr_read_qp(cmd->qp_handle, file->ucontext);
+ if (!qp) {
+ kfree(attr);
+ return -EINVAL;
+ }
+
+ attr->qp_state = cmd->qp_state;
+ attr->cur_qp_state = cmd->cur_qp_state;
+ attr->path_mtu = cmd->path_mtu;
+ attr->path_mig_state = cmd->path_mig_state;
+ attr->qkey = cmd->qkey;
+ attr->rq_psn = cmd->rq_psn & 0xffffff;
+ attr->sq_psn = cmd->sq_psn & 0xffffff;
+ attr->dest_qp_num = cmd->dest_qp_num;
+ attr->qp_access_flags = cmd->qp_access_flags;
+ attr->pkey_index = cmd->pkey_index;
+ attr->alt_pkey_index = cmd->alt_pkey_index;
+ attr->en_sqd_async_notify = cmd->en_sqd_async_notify;
+ attr->max_rd_atomic = cmd->max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd->max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd->min_rnr_timer;
+ attr->port_num = cmd->port_num;
+ attr->timeout = cmd->timeout;
+ attr->retry_cnt = cmd->retry_cnt;
+ attr->rnr_retry = cmd->rnr_retry;
+ attr->alt_port_num = cmd->alt_port_num;
+ attr->alt_timeout = cmd->alt_timeout;
+ if (cmd->comp_mask & IB_UVERBS_EXP_QP_ATTR_FLOW_ENTROPY) {
+ if (offsetof(typeof(*cmd), flow_entropy) + sizeof(cmd->flow_entropy) <= cmd_len) {
+ attr->flow_entropy = cmd->flow_entropy;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd->dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd->dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd->dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd->dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd->dest.traffic_class;
+ attr->ah_attr.dlid = cmd->dest.dlid;
+ attr->ah_attr.sl = cmd->dest.sl;
+ attr->ah_attr.src_path_bits = cmd->dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd->dest.static_rate;
+ attr->ah_attr.ah_flags = cmd->dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd->dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd->alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd->alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd->alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd->alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd->alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd->alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd->alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd->alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd->alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd->alt_dest.port_num;
+ port_num = (cmd->attr_mask & IB_QP_PORT) ? cmd->port_num : qp->port_num;
+
+ if (cmd_type == IB_USER_VERBS_CMD_EXP) {
+ exp_mask = cmd->exp_attr_mask & IBV_EXP_QP_ATTR_MASK;
+ attr->dct_key = cmd->dct_key;
+ }
+
+ if (qp->real_qp == qp) {
+ ret = ib_resolve_eth_dmac(qp, attr, &cmd->attr_mask);
+ if (ret)
+ goto out;
+ ret = qp->device->modify_qp(qp, attr,
+ modify_qp_mask(qp->qp_type, cmd->attr_mask | exp_mask), udata);
+ if (!ret && (cmd->attr_mask & IB_QP_PORT))
+ qp->port_num = attr->port_num;
} else {
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd->attr_mask | exp_mask));
}
+ if (ret)
+ goto out;
+
+ ret = cmd_len + hw_len;
+
+out:
+ put_qp_read(qp);
+ kfree(attr);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_qp cmd;
+ struct ib_udata udata;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- attrx = kzalloc(sizeof(*attrx), GFP_KERNEL);
- if (!attrx)
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
return -ENOMEM;
- attr = (struct ib_qp_attr *)attrx;
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp) {
- kfree(attrx);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
attr->qp_state = cmd.qp_state;
@@ -2078,77 +2556,38 @@
attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
- port_num = (cmd.attr_mask & IB_QP_PORT) ? cmd.port_num : qp->port_num;
- if ((cmd.attr_mask & IB_QP_AV) && port_num &&
- (rdma_port_get_link_layer(qp->device, port_num) ==
- IB_LINK_LAYER_ETHERNET)) {
- ret = ib_query_gid(qp->device, port_num,
- attr->ah_attr.grh.sgid_index, &sgid);
- if (ret)
- goto out;
- dgid = &attr->ah_attr.grh.dgid;
- if (rdma_link_local_addr((struct in6_addr *)dgid->raw)) {
- rdma_get_ll_mac((struct in6_addr *)dgid->raw,
- attr->ah_attr.dmac);
- rdma_get_ll_mac((struct in6_addr *)sgid.raw,
- attr->smac);
- attr->vlan_id = rdma_get_vlan_id(&sgid);
- } else {
- ret = rdma_addr_find_dmac_by_grh(&sgid, dgid,
- attr->ah_attr.dmac,
- &attr->vlan_id, -1U);
- if (ret)
- goto out;
- ret = rdma_addr_find_smac_by_sgid(&sgid, attr->smac,
- NULL, -1U);
- if (ret)
- goto out;
- }
- cmd.attr_mask |= IB_QP_SMAC;
- if (attr->vlan_id < 0xFFFF)
- cmd.attr_mask |= IB_QP_VID;
- }
- if (cmd_type == IB_USER_VERBS_CMD_EXTENDED) {
- if (cmd.comp_mask & IB_UVERBS_QP_ATTR_DCT_KEY)
- attrx->dct_key = cmd.dct_key;
- }
if (qp->real_qp == qp) {
+ ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
+ if (ret)
+ goto release_qp;
ret = qp->device->modify_qp(qp, attr,
modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
- if (!ret && (cmd.attr_mask & IB_QP_PORT))
- qp->port_num = attr->port_num;
} else {
ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
}
if (ret)
- goto out;
+ goto release_qp;
ret = in_len;
-out:
+release_qp:
put_qp_read(qp);
- kfree(attrx);
+
+out:
+ kfree(attr);
return ret;
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
-{
- return __uverbs_modify_qp(file, buf, in_len, out_len,
- IB_USER_VERBS_CMD_BASIC);
-}
-
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
struct ib_qp *qp;
struct ib_uqp_object *obj;
int ret = -EINVAL;
@@ -2201,14 +2640,14 @@
}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
struct ib_uverbs_send_wr *user_wr;
struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
- struct ib_qp *qp;
+ struct ib_qp *qp;
int i, sg_ind;
int is_ud;
ssize_t ret = -EINVAL;
@@ -2251,13 +2690,13 @@
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
if (!next) {
- ret = -ENOMEM;
- goto out_put;
- }
+ ret = -ENOMEM;
+ goto out_put;
+ }
if (!last)
wr = next;
- else
+ else
last->next = next;
last = next;
@@ -2272,10 +2711,13 @@
file->ucontext);
if (!next->wr.ud.ah) {
ret = -EINVAL;
- goto out_put;
+ goto out_put;
}
next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ if (next->opcode == IB_WR_SEND_WITH_IMM)
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2332,7 +2774,7 @@
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
- break;
+ break;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -2366,7 +2808,7 @@
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
- int ret;
+ int ret;
if (in_len < wqe_size * wr_count +
sge_count * sizeof (struct ib_uverbs_sge))
@@ -2389,9 +2831,9 @@
}
if (user_wr->num_sge + sg_ind > sge_count) {
- ret = -EINVAL;
- goto err;
- }
+ ret = -EINVAL;
+ goto err;
+ }
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
@@ -2399,7 +2841,7 @@
if (!next) {
ret = -ENOMEM;
goto err;
- }
+ }
if (!last)
wr = next;
@@ -2540,8 +2982,8 @@
}
ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
@@ -2564,7 +3006,7 @@
init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
down_write(&uobj->mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2580,6 +3022,7 @@
attr.grh.sgid_index = cmd.attr.grh.sgid_index;
attr.grh.hop_limit = cmd.attr.grh.hop_limit;
attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+ memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
ah = ib_create_ah(pd, &attr);
@@ -2635,7 +3078,7 @@
struct ib_uverbs_destroy_ah cmd;
struct ib_ah *ah;
struct ib_uobject *uobj;
- int ret;
+ int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2713,14 +3156,14 @@
}
ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
- int ret = -EINVAL;
+ int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2789,7 +3232,7 @@
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
- }
+ }
attr.event_handler = ib_uverbs_srq_event_handler;
attr.srq_context = file;
@@ -2883,13 +3326,13 @@
}
ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_xsrq xcmd;
struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
+ struct ib_udata udata;
int ret;
if (out_len < sizeof resp)
@@ -2997,7 +3440,7 @@
put_srq_read(srq);
if (ret)
- return ret;
+ return ret;
memset(&resp, 0, sizeof resp);
@@ -3013,8 +3456,8 @@
}
ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
@@ -3069,19 +3512,60 @@
return ret ? ret : in_len;
}
-ssize_t ib_uverbs_exp_create_dct(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_query_device_resp resp;
+ struct ib_uverbs_ex_query_device cmd;
+ struct ib_device_attr attr;
+ struct ib_device *device;
+ int err;
+
+ device = file->device->ib_dev;
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (cmd.reserved)
+ return -EINVAL;
+
+ resp.response_length = sizeof(resp);
+
+ if (ucore->outlen < resp.response_length)
+ return -ENOSPC;
+
+ err = device->query_device(device, &attr);
+ if (err)
+ return err;
+
+ copy_query_dev_fields(file, &resp.base, &attr);
+ resp.comp_mask = 0;
+
+ err = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int ib_uverbs_exp_create_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- int in_len = ucore->inlen + uhw->inlen;
int out_len = ucore->outlen + uhw->outlen;
- struct ib_uverbs_create_dct cmd;
+ struct ib_uverbs_create_dct *cmd;
struct ib_uverbs_create_dct_resp resp;
- struct ib_udata udata;
struct ib_udct_object *obj;
struct ib_dct *dct;
int ret;
- struct ib_dct_init_attr attr;
+ struct ib_dct_init_attr *attr;
struct ib_pd *pd = NULL;
struct ib_cq *cq = NULL;
struct ib_srq *srq = NULL;
@@ -3089,66 +3573,82 @@
if (out_len < sizeof(resp))
return -ENOSPC;
- ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr || !cmd) {
+ ret = -ENOMEM;
+ goto err_cmd_attr;
+ }
+
+ ret = ucore->ops->copy_from(cmd, ucore, sizeof(*cmd));
if (ret)
- return ret;
+ goto err_cmd_attr;
obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ if (!obj) {
+ ret = -ENOMEM;
+ goto err_cmd_attr;
+ }
- init_uobj(&obj->uobject, cmd.user_handle, file->ucontext,
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
&dct_lock_class);
- down_write(&obj->uobject.mutex);
+ down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_pd;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
if (!cq) {
ret = -EINVAL;
goto err_put;
}
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = idr_read_srq(cmd->srq_handle, file->ucontext);
if (!srq) {
ret = -EINVAL;
goto err_put;
}
- attr.cq = cq;
- attr.access_flags = cmd.access_flags;
- attr.min_rnr_timer = cmd.min_rnr_timer;
- attr.srq = srq;
- attr.tclass = cmd.tclass;
- attr.flow_label = cmd.flow_label;
- attr.dc_key = cmd.dc_key;
- attr.mtu = cmd.mtu;
- attr.port = cmd.port;
- attr.pkey_index = cmd.pkey_index;
- attr.gid_index = cmd.gid_index;
- attr.hop_limit = cmd.hop_limit;
- attr.create_flags = cmd.create_flags;
-
- dct = ib_create_dct(pd, &attr, &udata);
+ if (cmd->create_flags & ~IB_DCT_CREATE_FLAGS_MASK) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ attr->cq = cq;
+ attr->access_flags = cmd->access_flags;
+ attr->min_rnr_timer = cmd->min_rnr_timer;
+ attr->srq = srq;
+ attr->tclass = cmd->tclass;
+ attr->flow_label = cmd->flow_label;
+ attr->dc_key = cmd->dc_key;
+ attr->mtu = cmd->mtu;
+ attr->port = cmd->port;
+ attr->pkey_index = cmd->pkey_index;
+ attr->gid_index = cmd->gid_index;
+ attr->hop_limit = cmd->hop_limit;
+ attr->create_flags = cmd->create_flags;
+
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ dct = ib_create_dct(pd, attr, uhw);
if (IS_ERR(dct)) {
ret = PTR_ERR(dct);
goto err_put;
}
dct->device = file->device->ib_dev;
- dct->uobject = &obj->uobject;
+ dct->uobject = &obj->uevent.uobject;
- obj->uobject.object = dct;
- ret = idr_add_uobj(&ib_uverbs_dct_idr, &obj->uobject);
+ obj->uevent.uobject.object = dct;
+ ret = idr_add_uobj(&ib_uverbs_dct_idr, &obj->uevent.uobject);
if (ret)
goto err_dct;
memset(&resp, 0, sizeof(resp));
- resp.dct_handle = obj->uobject.id;
+ resp.dct_handle = obj->uevent.uobject.id;
resp.dctn = dct->dct_num;
ret = ucore->ops->copy_to(ucore, &resp, sizeof(resp));
@@ -3156,21 +3656,23 @@
goto err_copy;
mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->dct_list);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->dct_list);
mutex_unlock(&file->mutex);
- obj->uobject.live = 1;
+ obj->uevent.uobject.live = 1;
put_srq_read(srq);
put_cq_read(cq);
put_pd_read(pd);
- up_write(&obj->uobject.mutex);
+ up_write(&obj->uevent.uobject.mutex);
+ kfree(attr);
+ kfree(cmd);
- return in_len;
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_dct_idr, &obj->uobject);
+ idr_remove_uobj(&ib_uverbs_dct_idr, &obj->uevent.uobject);
err_dct:
ib_destroy_dct(dct);
@@ -3185,21 +3687,24 @@
put_pd_read(pd);
err_pd:
- put_uobj_write(&obj->uobject);
+ put_uobj_write(&obj->uevent.uobject);
+
+err_cmd_attr:
+ kfree(attr);
+ kfree(cmd);
return ret;
}
-ssize_t ib_uverbs_exp_destroy_dct(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+int ib_uverbs_exp_destroy_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- int in_len = ucore->inlen + uhw->inlen;
int out_len = ucore->outlen + uhw->outlen;
struct ib_uverbs_destroy_dct cmd;
struct ib_uverbs_destroy_dct_resp resp;
- struct ib_uobject *uobj;
- struct ib_dct *dct;
+ struct ib_uobject *uobj;
struct ib_udct_object *obj;
+ struct ib_dct *dct;
int ret;
if (out_len < sizeof(resp))
@@ -3209,12 +3714,12 @@
if (ret)
return ret;
- uobj = idr_write_uobj(&ib_uverbs_dct_idr, cmd.user_handle, file->ucontext);
+ uobj = idr_write_uobj(&ib_uverbs_dct_idr, cmd.dct_handle, file->ucontext);
if (!uobj)
return -EINVAL;
dct = uobj->object;
- obj = container_of(dct->uobject, struct ib_udct_object, uobject);
+ obj = container_of(uobj, struct ib_udct_object, uevent.uobject);
ret = ib_destroy_dct(dct);
if (!ret)
@@ -3232,6 +3737,7 @@
mutex_unlock(&file->mutex);
memset(&resp, 0, sizeof(resp));
+ resp.events_reported = obj->uevent.events_reported;
put_uobj(uobj);
@@ -3239,14 +3745,45 @@
if (ret)
return ret;
- return in_len;
+ return 0;
+}
+
+int ib_uverbs_exp_arm_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ int out_len = ucore->outlen + uhw->outlen;
+ struct ib_uverbs_arm_dct cmd;
+ struct ib_uverbs_arm_dct_resp resp;
+ struct ib_dct *dct;
+ int err;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ err = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ dct = idr_read_dct(cmd.dct_handle, file->ucontext);
+ if (!dct)
+ return -EINVAL;
+
+ err = dct->device->exp_arm_dct(dct, uhw);
+ put_dct_read(dct);
+ if (err)
+ return err;
+
+ memset(&resp, 0, sizeof(resp));
+ err = ucore->ops->copy_to(ucore, &resp, sizeof(resp));
+
+ return err;
}
-ssize_t ib_uverbs_exp_query_dct(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+int ib_uverbs_exp_query_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- int in_len = ucore->inlen + uhw->inlen;
int out_len = ucore->outlen + uhw->outlen;
struct ib_uverbs_query_dct cmd;
struct ib_uverbs_query_dct_resp resp;
@@ -3287,7 +3824,7 @@
resp.flow_label = attr->flow_label;
resp.key_violations = attr->key_violations;
resp.port = attr->port;
- resp.min_rnr_timer = attr->min_rnr_timer;
+ resp.min_rnr_timer = attr->min_rnr_timer;
resp.tclass = attr->tclass;
resp.mtu = attr->mtu;
resp.pkey_index = attr->pkey_index;
@@ -3300,23 +3837,29 @@
out:
kfree(attr);
- return err ? err : in_len;
+ return err;
}
-
/*
* Experimental functions
*/
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec(struct ib_uverbs_exp_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ int is_exp)
{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (!is_exp && kern_spec->type == IB_FLOW_SPEC_IPV6)
+ return -EINVAL;
+
ib_spec->type = kern_spec->type;
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+ if (ib_spec->eth.size != kern_spec->eth.size)
+ return -EINVAL;
memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
sizeof(struct ib_flow_eth_filter));
memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
@@ -3331,14 +3874,27 @@
break;
case IB_FLOW_SPEC_IPV4:
ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+ if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+ return -EINVAL;
memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
sizeof(struct ib_flow_ipv4_filter));
memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
sizeof(struct ib_flow_ipv4_filter));
break;
+ case IB_FLOW_SPEC_IPV6:
+ ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6);
+ if (ib_spec->ipv6.size != kern_spec->ipv6.size)
+ return -EINVAL;
+ memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val,
+ sizeof(struct ib_flow_ipv6_filter));
+ memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask,
+ sizeof(struct ib_flow_ipv6_filter));
+ break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+ if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+ return -EINVAL;
memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
sizeof(struct ib_flow_tcp_udp_filter));
memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
@@ -3350,21 +3906,22 @@
return 0;
}
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+int ib_uverbs_exp_create_wq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- struct ib_uverbs_create_flow cmd;
- struct ib_uverbs_create_flow_resp resp;
- struct ib_uobject *uobj;
- struct ib_flow *flow_id;
- struct ib_uverbs_flow_attr *kern_flow_attr;
- struct ib_flow_attr *flow_attr;
- struct ib_qp *qp;
+ struct ib_uverbs_exp_create_wq cmd;
+ struct ib_uverbs_exp_create_wq_resp resp;
+ struct ib_uobject *uobj;
int err = 0;
- void *kern_spec;
- void *ib_spec;
- int i;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_wq *wq;
+ struct ib_srq *srq = NULL;
+ struct ib_wq_init_attr wq_init_attr;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
if (ucore->outlen < sizeof(resp))
return -ENOSPC;
@@ -3373,114 +3930,529 @@
if (err)
return err;
- ucore->inbuf += sizeof(cmd);
- ucore->inlen -= sizeof(cmd);
-
- if (cmd.comp_mask)
- return -EINVAL;
-
- if (priv_check(curthread, PRIV_NET_RAW) && !disable_raw_qp_enforcement)
- return -EPERM;
-
- if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ if (cmd.reserved)
return -EINVAL;
- if (cmd.flow_attr.size > ucore->inlen ||
- cmd.flow_attr.size >
- (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
+ if (cmd.comp_mask)
return -EINVAL;
- if (cmd.flow_attr.num_of_specs) {
- kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) +
- cmd.flow_attr.size, GFP_KERNEL);
- if (!kern_flow_attr)
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
return -ENOMEM;
- memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
- err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
- cmd.flow_attr.size);
- if (err)
- goto err_free_attr;
- } else {
- kern_flow_attr = &cmd.flow_attr;
- }
-
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
- goto err_free_attr;
- }
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+ init_uobj(uobj, cmd.user_handle, file->ucontext, &wq_lock_class);
down_write(&uobj->mutex);
-
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp) {
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size,
- GFP_KERNEL);
- if (!flow_attr) {
- err = -ENOMEM;
- goto err_put;
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq) {
+ err = -EINVAL;
+ goto err_put_pd;
}
- flow_attr->type = kern_flow_attr->type;
- flow_attr->priority = kern_flow_attr->priority;
- flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
- flow_attr->port = kern_flow_attr->port;
- flow_attr->flags = kern_flow_attr->flags;
- flow_attr->size = sizeof(*flow_attr);
-
- kern_spec = kern_flow_attr + 1;
- ib_spec = flow_attr + 1;
- for (i = 0; i < flow_attr->num_of_specs &&
- cmd.flow_attr.size >
- offsetof(struct ib_uverbs_flow_spec, reserved) &&
- cmd.flow_attr.size >=
- ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(kern_spec, ib_spec);
- if (err)
- goto err_free;
- flow_attr->size +=
- ((union ib_flow_spec *)ib_spec)->size;
- cmd.flow_attr.size -=
- ((struct ib_uverbs_flow_spec *)kern_spec)->size;
- kern_spec += ((struct ib_uverbs_flow_spec *)kern_spec)->size;
- ib_spec += ((union ib_flow_spec *)ib_spec)->size;
- }
- if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
- pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
- i, cmd.flow_attr.size);
- goto err_free;
+ if (cmd.wq_type == IB_WQT_SRQ) {
+ cmd.max_recv_wr = 0;
+ cmd.max_recv_sge = 0;
+ srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+ err = -EINVAL;
+ goto err_put_srq;
}
- flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
- if (IS_ERR(flow_id)) {
- err = PTR_ERR(flow_id);
- goto err_free;
}
- flow_id->qp = qp;
- flow_id->uobject = uobj;
- uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+ memset(&wq_init_attr, 0, sizeof(wq_init_attr));
+ wq_init_attr.cq = cq;
+ wq_init_attr.max_recv_sge = cmd.max_recv_sge;
+ wq_init_attr.max_recv_wr = cmd.max_recv_wr;
+ wq_init_attr.srq = srq;
+ wq_init_attr.wq_context = file;
+ wq_init_attr.wq_type = cmd.wq_type;
+ wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+
+ if (IS_ERR(wq)) {
+ err = PTR_ERR(wq);
+ goto err_put_srq;
+ }
+
+ wq->uobject = uobj;
+ uobj->object = wq;
+ wq->wq_type = wq_init_attr.wq_type;
+ wq->cq = cq;
+ wq->pd = pd;
+ wq->srq = srq;
+ wq->device = pd->device;
+ wq->wq_context = wq_init_attr.wq_context;
+ atomic_set(&wq->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&cq->usecnt);
+ if (srq)
+ atomic_inc(&srq->usecnt);
+
+ err = idr_add_uobj(&ib_uverbs_wq_idr, uobj);
if (err)
- goto destroy_flow;
+ goto destroy_wq;
memset(&resp, 0, sizeof(resp));
- resp.flow_handle = uobj->id;
-
+ resp.wq_handle = uobj->id;
+ resp.max_recv_sge = wq_init_attr.max_recv_sge;
+ resp.max_recv_wr = wq_init_attr.max_recv_wr;
+ resp.wqn = wq->wq_num;
+ resp.response_length = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
err = ib_copy_to_udata(ucore,
&resp, sizeof(resp));
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
+ put_pd_read(pd);
+ put_cq_read(cq);
+ if (srq)
+ put_srq_read(srq);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->wq_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return 0;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
+destroy_wq:
+ ib_destroy_wq(wq);
+err_put_srq:
+ if (srq)
+ put_srq_read(srq);
+ put_cq_read(cq);
+err_put_pd:
+ put_pd_read(pd);
+err_uobj:
+ put_uobj_write(uobj);
+
+ return err;
+}
+
+int ib_uverbs_exp_destroy_wq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_destroy_wq cmd;
+ struct ib_wq *wq;
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ wq = uobj->object;
+ ret = ib_destroy_wq(wq);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+ return ret;
+}
+
+int ib_uverbs_exp_modify_wq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_modify_wq cmd;
+ struct ib_wq *wq;
+ int ret;
+ struct ib_wq_attr wq_attr;
+ enum ib_wq_attr_mask attr_mask;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (!cmd.comp_mask)
+ return -EINVAL;
+
+ attr_mask = cmd.comp_mask;
+ wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ if (!wq)
+ return -EINVAL;
+
+ memset(&wq_attr, 0, sizeof(wq_attr));
+ wq_attr.curr_wq_state = cmd.curr_wq_state;
+ wq_attr.wq_state = cmd.wq_state;
+ ret = wq->device->modify_wq(wq, &wq_attr, attr_mask, uhw);
+ put_wq_read(wq);
+ return ret;
+}
+
+int ib_uverbs_exp_create_rwq_ind_table(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_create_rwq_ind_table cmd;
+ struct ib_uverbs_exp_create_rwq_ind_table_resp resp;
+ struct ib_uobject *uobj;
+ int err = 0;
+ struct ib_pd *pd;
+ struct ib_rwq_ind_table_init_attr init_attr;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ struct ib_wq **wqs = NULL;
+ u32 *wqs_handles = NULL;
+ struct ib_wq *wq = NULL;
+ int i, j, num_read_wqs;
+ u32 num_wq_handles;
+ u32 expected_in_size;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (ucore->outlen < sizeof(resp))
+ return -ENOSPC;
+
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ ucore->inbuf += sizeof(cmd);
+ ucore->inlen -= sizeof(cmd);
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (cmd.reserved)
+ return -EINVAL;
+
+ num_wq_handles = 1 << cmd.log_ind_tbl_size;
+ expected_in_size = num_wq_handles * sizeof(__u32);
+ if (num_wq_handles == 1)
+ /* input size for wq handles is u64 aligned */
+ expected_in_size += sizeof(__u32);
+
+ if (ucore->inlen != expected_in_size)
+ return -EINVAL;
+
+ wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
+ GFP_KERNEL);
+ if (!wqs_handles)
+ return -ENOMEM;
+
+ err = ib_copy_from_udata(wqs_handles, ucore,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL);
+ if (!wqs)
+ goto err_free;
+
+ for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
+ num_read_wqs++) {
+ wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ if (!wq)
+ goto put_wqs;
+
+ wqs[num_read_wqs] = wq;
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ err = -ENOMEM;
+ goto put_wqs;
+ }
+
+ init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
+ memset(&init_attr, 0, sizeof(init_attr));
+ init_attr.pd = pd;
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+ rwq_ind_tbl = pd->device->create_rwq_ind_table(pd->device, &init_attr, uhw);
+
+ if (IS_ERR(rwq_ind_tbl)) {
+ err = PTR_ERR(rwq_ind_tbl);
+ goto put_pd;
+ }
+
+ rwq_ind_tbl->ind_tbl = wqs;
+ rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
+ rwq_ind_tbl->uobject = uobj;
+ uobj->object = rwq_ind_tbl;
+ rwq_ind_tbl->device = pd->device;
+ rwq_ind_tbl->pd = pd;
+ atomic_set(&rwq_ind_tbl->usecnt, 0);
+
+ for (i = 0; i < num_wq_handles; i++)
+ atomic_inc(&wqs[i]->usecnt);
+
+ err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+ if (err)
+ goto destroy_ind_tbl;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.ind_tbl_handle = uobj->id;
+ resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
+ resp.response_length = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
+
+ err = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+ if (err)
+ goto err_copy;
+
+ kfree(wqs_handles);
+ put_pd_read(pd);
+
+ for (j = 0; j < num_read_wqs; j++)
+ put_wq_read(wqs[j]);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+ return 0;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+destroy_ind_tbl:
+ ib_destroy_rwq_ind_table(rwq_ind_tbl);
+put_pd:
+ put_pd_read(pd);
+err_uobj:
+ put_uobj_write(uobj);
+put_wqs:
+ for (j = 0; j < num_read_wqs; j++)
+ put_wq_read(wqs[j]);
+err_free:
+ kfree(wqs_handles);
+ kfree(wqs);
+ return err;
+}
+
+int ib_uverbs_exp_destroy_rwq_ind_table(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_destroy_rwq_ind_table cmd;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ struct ib_uobject *uobj;
+ int ret;
+ struct ib_wq **ind_tbl;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ rwq_ind_tbl = uobj->object;
+ ind_tbl = rwq_ind_tbl->ind_tbl;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int common_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw,
+ bool is_exp)
+{
+ struct ib_uverbs_create_flow cmd;
+ struct ib_uverbs_create_flow_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_flow *flow_id;
+ struct ib_uverbs_flow_attr *kern_flow_attr;
+ struct ib_flow_attr *flow_attr;
+ struct ib_qp *qp;
+ int err = 0;
+ void *kern_spec;
+ void *ib_spec;
+ int i;
+ unsigned long spec_size;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (ucore->outlen < sizeof(resp))
+ return -ENOSPC;
+
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ ucore->inbuf += sizeof(cmd);
+ ucore->inlen -= sizeof(cmd);
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (priv_check(curthread, PRIV_NET_RAW) && !disable_raw_qp_enforcement)
+ return -EPERM;
+
+ if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ return -EINVAL;
+
+ spec_size = (is_exp) ? sizeof(struct ib_uverbs_exp_flow_spec) :
+ sizeof(struct ib_uverbs_flow_spec);
+ if (cmd.flow_attr.size > ucore->inlen ||
+ cmd.flow_attr.size >
+ (cmd.flow_attr.num_of_specs * spec_size))
+ return -EINVAL;
+
+ if (cmd.flow_attr.reserved[0] ||
+ cmd.flow_attr.reserved[1])
+ return -EINVAL;
+
+ if (cmd.flow_attr.num_of_specs) {
+ kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
+ GFP_KERNEL);
+ if (!kern_flow_attr)
+ return -ENOMEM;
+
+ memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
+ err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+ cmd.flow_attr.size);
+ if (err)
+ goto err_free_attr;
+ } else {
+ kern_flow_attr = &cmd.flow_attr;
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ err = -ENOMEM;
+ goto err_free_attr;
+ }
+ init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+ down_write(&uobj->mutex);
+
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ err = -EINVAL;
+ goto err_uobj;
+ }
+
+ flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
+ if (!flow_attr) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ flow_attr->type = kern_flow_attr->type;
+ flow_attr->priority = kern_flow_attr->priority;
+ flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
+ flow_attr->port = kern_flow_attr->port;
+ flow_attr->flags = kern_flow_attr->flags;
+ flow_attr->size = sizeof(*flow_attr);
+
+ kern_spec = kern_flow_attr + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs &&
+ cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec_hdr, reserved) &&
+ cmd.flow_attr.size >=
+ ((struct ib_uverbs_flow_spec_hdr *)kern_spec)->size; i++) {
+ err = kern_spec_to_ib_spec(kern_spec, ib_spec, is_exp);
+ if (err)
+ goto err_free;
+ flow_attr->size +=
+ ((union ib_flow_spec *) ib_spec)->size;
+ cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec_hdr *)kern_spec)->size;
+ kern_spec += ((struct ib_uverbs_flow_spec_hdr *)kern_spec)->size;
+ ib_spec += ((union ib_flow_spec *) ib_spec)->size;
+ }
+ if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
+ pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ i, cmd.flow_attr.size);
+ err = -EINVAL;
+ goto err_free;
+ }
+ flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+ if (IS_ERR(flow_id)) {
+ err = PTR_ERR(flow_id);
+ goto err_free;
+ }
+ flow_id->qp = qp;
+ flow_id->uobject = uobj;
+ uobj->object = flow_id;
+
+ err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+ if (err)
+ goto destroy_flow;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.flow_handle = uobj->id;
+
+ err = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+ if (err)
+ goto err_copy;
+
+ put_qp_read(qp);
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rule_list);
+ mutex_unlock(&file->mutex);
+
uobj->live = 1;
up_write(&uobj->mutex);
@@ -3504,6 +4476,20 @@
return err;
}
+int ib_uverbs_exp_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ return common_create_flow(file, ucore, uhw, true);
+}
+
+int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ return common_create_flow(file, ucore, uhw, false);
+}
+
int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *ucore,
struct ib_udata *uhw)
@@ -3513,10 +4499,16 @@
struct ib_uobject *uobj;
int ret;
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
if (ret)
return ret;
+ if (cmd.comp_mask)
+ return -EINVAL;
+
uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
file->ucontext);
if (!uobj)
@@ -3540,45 +4532,70 @@
return ret;
}
-ssize_t ib_uverbs_exp_modify_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore, struct ib_udata *uhw)
+int ib_uverbs_exp_modify_qp(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- const char __user *buf = ucore->inbuf;
- int in_len = ucore->inlen + uhw->inlen;
- int out_len = ucore->outlen + uhw->outlen;
+ struct ib_uverbs_exp_modify_qp cmd;
+ int ret;
+
+ if (ucore->inlen < offsetof(typeof(cmd), comp_mask) + sizeof(cmd.comp_mask))
+ return -EINVAL;
+
+ ret = ucore->ops->copy_from(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask >= IB_UVERBS_EXP_QP_ATTR_RESERVED)
+ return -ENOSYS;
+
+ ret = __uverbs_modify_qp(file, ucore->inlen,
+ IB_USER_VERBS_CMD_EXP, &cmd, uhw->inlen, uhw);
+ if (ret < 0)
+ return ret;
- return __uverbs_modify_qp(file, buf, in_len, out_len,
- IB_USER_VERBS_CMD_EXTENDED);
+ return 0;
}
-ssize_t ib_uverbs_exp_create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore, struct ib_udata *uhw)
+int ib_uverbs_exp_create_cq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- const char __user *buf = ucore->inbuf;
int in_len = ucore->inlen + uhw->inlen;
int out_len = ucore->outlen + uhw->outlen;
- struct ib_uverbs_create_cq_ex cmd;
+ struct ib_uverbs_exp_create_cq cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
- return create_cq(file, buf, in_len, out_len, &cmd,
- IB_USER_VERBS_CMD_EXTENDED, ucore->outbuf);
+ if (cmd.comp_mask >= IB_UVERBS_EXP_CREATE_CQ_ATTR_RESERVED)
+ return -ENOSYS;
+
+ ret = create_cq(file, in_len, out_len, &cmd,
+ IB_USER_VERBS_CMD_EXP, ucore->outbuf, uhw);
+ if (ret < 0)
+ return ret;
+
+ return 0;
}
-ssize_t ib_uverbs_exp_modify_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore, struct ib_udata *uhw)
+int ib_uverbs_exp_modify_cq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- const char __user *buf = ucore->inbuf;
- int in_len = ucore->inlen + uhw->inlen;
- struct ib_uverbs_modify_cq_ex cmd;
+ struct ib_uverbs_exp_modify_cq cmd;
struct ib_cq *cq;
struct ib_cq_attr attr;
- int ret;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ memset(&cmd, 0, sizeof(cmd));
+ ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask >= IB_UVERBS_EXP_CQ_ATTR_RESERVED)
+ return -ENOSYS;
cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!cq)
@@ -3592,80 +4609,141 @@
put_cq_read(cq);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_exp_query_device(struct ib_uverbs_file *file,
- struct ib_udata *ucore, struct ib_udata *uhw)
+int ib_uverbs_exp_query_device(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- struct ib_uverbs_exp_query_device_resp resp;
- struct ib_exp_device_attr exp_attr;
+ struct ib_uverbs_exp_query_device_resp *resp;
+ struct ib_uverbs_exp_query_device cmd;
+ struct ib_exp_device_attr *exp_attr;
int ret;
- if (ucore->outlen + uhw->outlen < sizeof(resp))
- return -ENOSPC;
-
- memset(&resp, 0, sizeof(resp));
- memset(&exp_attr, 0, sizeof(exp_attr));
- ret = ib_exp_query_device(file->device->ib_dev, &exp_attr);
+ ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
if (ret)
return ret;
- ib_uverbs_query_device_assign(&resp.base, &exp_attr.base, file);
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ exp_attr = kzalloc(sizeof(*exp_attr), GFP_KERNEL);
+ if (!exp_attr || !resp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = ib_exp_query_device(file->device->ib_dev, exp_attr);
+ if (ret)
+ goto out;
+
+ memset(resp, 0, sizeof(*resp));
+ copy_query_dev_fields(file, &resp->base, &exp_attr->base);
- resp.comp_mask = 0;
- resp.device_cap_flags2 = 0;
+ resp->comp_mask = 0;
+ resp->device_cap_flags2 = 0;
/*
* Handle regular attr fields
*/
- if (exp_attr.base.comp_mask & IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK) {
- resp.timestamp_mask = exp_attr.base.timestamp_mask;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK;
+ if (exp_attr->base.comp_mask & IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK) {
+ resp->timestamp_mask = exp_attr->base.timestamp_mask;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK;
}
- if (exp_attr.base.comp_mask & IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) {
- resp.hca_core_clock = exp_attr.base.hca_core_clock;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
+ if (exp_attr->base.comp_mask & IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) {
+ resp->hca_core_clock = exp_attr->base.hca_core_clock;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
}
/*
* Handle experimental attr fields
*/
- if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_CAP_FLAGS2) {
- resp.device_cap_flags2 = exp_attr.device_cap_flags2;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_CAP_FLAGS2 ||
+ exp_attr->base.device_cap_flags & IB_EXP_DEVICE_MASK) {
+ resp->device_cap_flags2 = exp_attr->device_cap_flags2;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2;
+ resp->device_cap_flags2 |= IB_EXP_DEVICE_MASK & exp_attr->base.device_cap_flags;
+ resp->base.device_cap_flags &= ~IB_EXP_DEVICE_MASK;
}
- if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_REQ_RD) {
- resp.dc_rd_req = exp_attr.dc_rd_req;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_DC_REQ_RD;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_REQ_RD) {
+ resp->dc_rd_req = exp_attr->dc_rd_req;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_DC_REQ_RD;
}
- if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_RES_RD) {
- resp.dc_rd_res = exp_attr.dc_rd_res;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_DC_RES_RD;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_RES_RD) {
+ resp->dc_rd_res = exp_attr->dc_rd_res;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_DC_RES_RD;
}
- if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ) {
- resp.inline_recv_sz = exp_attr.inline_recv_sz;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_MAX_DCT) {
+ resp->max_dct = exp_attr->max_dct;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_MAX_DCT;
}
- if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_RSS_TBL_SZ) {
- resp.max_rss_tbl_sz = exp_attr.max_rss_tbl_sz;
- resp.comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ) {
+ resp->inline_recv_sz = exp_attr->inline_recv_sz;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
}
- if (copy_to_user(ucore->outbuf, &resp, sizeof(resp)))
- return -EFAULT;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_RSS_TBL_SZ) {
+ resp->max_rss_tbl_sz = exp_attr->max_rss_tbl_sz;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
+ }
+
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS) {
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS;
+ resp->atomic_arg_sizes = exp_attr->atomic_arg_sizes;
+ resp->max_fa_bit_boudary = exp_attr->max_fa_bit_boudary;
+ resp->log_max_atomic_inline_arg = exp_attr->log_max_atomic_inline_arg;
+ }
+
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_UMR) {
+ resp->umr_caps.max_reg_descriptors = exp_attr->umr_caps.max_reg_descriptors;
+ resp->umr_caps.max_send_wqe_inline_klms = exp_attr->umr_caps.max_send_wqe_inline_klms;
+ resp->umr_caps.max_umr_recursion_depth = exp_attr->umr_caps.max_umr_recursion_depth;
+ resp->umr_caps.max_umr_stride_dimenson = exp_attr->umr_caps.max_umr_stride_dimenson;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_UMR;
+ }
+
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN) {
+ resp->max_ctx_res_domain = exp_attr->max_ctx_res_domain;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN;
+ }
+
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_MAX_WQ_TYPE_RQ) {
+ resp->max_wq_type_rq = exp_attr->max_wq_type_rq;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_MAX_WQ_TYPE_RQ;
+ }
+
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_MAX_DEVICE_CTX) {
+ resp->max_device_ctx = exp_attr->max_device_ctx;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_MAX_DEVICE_CTX;
+ }
- return ucore->inlen + uhw->inlen;
+ if (exp_attr->exp_comp_mask & IB_EXP_DEVICE_ATTR_RX_HASH) {
+ resp->rx_hash.max_rwq_indirection_tables = exp_attr->rx_hash_caps.max_rwq_indirection_tables;
+ resp->rx_hash.max_rwq_indirection_table_size = exp_attr->rx_hash_caps.max_rwq_indirection_table_size;
+ resp->rx_hash.supported_packet_fields = exp_attr->rx_hash_caps.supported_packet_fields;
+ resp->rx_hash.supported_qps = exp_attr->rx_hash_caps.supported_qps;
+ resp->rx_hash.supported_hash_functions = exp_attr->rx_hash_caps.supported_hash_functions;
+ resp->comp_mask |= IB_EXP_DEVICE_ATTR_RX_HASH;
+ }
+
+ if (copy_to_user(ucore->outbuf, resp, min_t(size_t, sizeof(*resp),
+ ucore->outlen))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+out:
+ kfree(exp_attr);
+ kfree(resp);
+
+ return ret;
}
-ssize_t ib_uverbs_exp_create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore, struct ib_udata *uhw)
+int ib_uverbs_exp_create_qp(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
struct ib_uqp_object *obj;
struct ib_device *device;
@@ -3675,53 +4753,74 @@
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- struct ib_exp_qp_init_attr attr;
- int ret;
- struct ib_uverbs_exp_create_qp cmd_exp;
+ struct ib_exp_qp_init_attr *attr;
+ struct ib_uverbs_exp_create_qp *cmd_exp;
struct ib_uverbs_exp_create_qp_resp resp_exp;
struct ib_qp *parentqp = NULL;
+ int ret;
+ struct ib_rx_hash_conf rx_hash_conf;
+ struct ib_rwq_ind_table *ind_tbl = NULL;
+ int rx_qp = 0;
+ int i;
- memset(&cmd_exp, 0, sizeof(cmd_exp));
-
- ret = ucore->ops->copy_from(&cmd_exp, ucore, sizeof(cmd_exp));
+ cmd_exp = kzalloc(sizeof(*cmd_exp), GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!cmd_exp || !attr) {
+ ret = -ENOMEM;
+ goto err_cmd_attr;
+ }
+ ret = ucore->ops->copy_from(cmd_exp, ucore, sizeof(*cmd_exp));
if (ret)
- return ret;
+ goto err_cmd_attr;
if (!disable_raw_qp_enforcement &&
- cmd_exp.qp_type == IB_QPT_RAW_PACKET && priv_check(curthread,
- PRIV_NET_RAW))
- return -EPERM;
+ cmd_exp->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread,
+ PRIV_NET_RAW)) {
+ ret = -EPERM;
+ goto err_cmd_attr;
+ }
+
+ for (i = 0; i < sizeof(cmd_exp->reserved_2); i++) {
+ if (cmd_exp->reserved_2[i] != 0) {
+ ret = -EINVAL;
+ goto err_cmd_attr;
+ }
+ }
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ if (!obj) {
+ ret = -ENOMEM;
+ goto err_cmd_attr;
+ }
- init_uobj(&obj->uevent.uobject, cmd_exp.user_handle, file->ucontext,
+ init_uobj(&obj->uevent.uobject, cmd_exp->user_handle, file->ucontext,
&qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
+ rx_qp = cmd_exp->rx_hash_conf.rx_hash_function ? 1 : 0;
- if (cmd_exp.qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd_exp.pd_handle, file->ucontext, &xrcd_uobj);
+ if (cmd_exp->qp_type == IB_QPT_XRC_TGT) {
+ xrcd = idr_read_xrcd(cmd_exp->pd_handle, file->ucontext, &xrcd_uobj);
if (!xrcd) {
ret = -EINVAL;
goto err_put;
}
device = xrcd->device;
} else {
- if (cmd_exp.qp_type == IB_QPT_XRC_INI) {
- cmd_exp.max_recv_wr = 0;
- cmd_exp.max_recv_sge = 0;
+ if (cmd_exp->qp_type == IB_QPT_XRC_INI ||
+ cmd_exp->qp_type == IB_EXP_QPT_DC_INI) {
+ cmd_exp->max_recv_wr = 0;
+ cmd_exp->max_recv_sge = 0;
} else {
- if (cmd_exp.is_srq) {
- srq = idr_read_srq(cmd_exp.srq_handle, file->ucontext);
+ if (cmd_exp->is_srq) {
+ srq = idr_read_srq(cmd_exp->srq_handle, file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
}
}
- if (cmd_exp.recv_cq_handle != cmd_exp.send_cq_handle) {
- rcq = idr_read_cq(cmd_exp.recv_cq_handle, file->ucontext, 0);
+ if (cmd_exp->recv_cq_handle != cmd_exp->send_cq_handle) {
+ rcq = idr_read_cq(cmd_exp->recv_cq_handle, file->ucontext, 0);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -3729,10 +4828,11 @@
}
}
- scq = idr_read_cq(cmd_exp.send_cq_handle, file->ucontext, !!rcq);
+ if (!rx_qp)
+ scq = idr_read_cq(cmd_exp->send_cq_handle, file->ucontext, !!rcq);
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd_exp.pd_handle, file->ucontext);
- if (!pd || !scq) {
+ pd = idr_read_pd(cmd_exp->pd_handle, file->ucontext);
+ if (!pd || (!scq && !rx_qp)) {
ret = -EINVAL;
goto err_put;
}
@@ -3740,42 +4840,44 @@
device = pd->device;
}
- memset(&attr, 0, sizeof(attr));
- attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
- attr.send_cq = scq;
- attr.recv_cq = rcq;
- attr.srq = srq;
- attr.xrcd = xrcd;
- attr.sq_sig_type = cmd_exp.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- attr.qp_type = cmd_exp.qp_type;
- attr.create_flags = 0;
-
- attr.cap.max_send_wr = cmd_exp.max_send_wr;
- attr.cap.max_recv_wr = cmd_exp.max_recv_wr;
- attr.cap.max_send_sge = cmd_exp.max_send_sge;
- attr.cap.max_recv_sge = cmd_exp.max_recv_sge;
- attr.cap.max_inline_data = cmd_exp.max_inline_data;
-
- if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS)
- attr.create_flags |= cmd_exp.qp_cap_flags &
- (IB_QP_CREATE_CROSS_CHANNEL |
- IB_QP_CREATE_MANAGED_SEND |
- IB_QP_CREATE_MANAGED_RECV);
-
- if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_QPG) {
+ attr->event_handler = ib_uverbs_qp_event_handler;
+ attr->qp_context = file;
+ attr->send_cq = scq;
+ attr->recv_cq = rcq;
+ attr->srq = srq;
+ attr->xrcd = xrcd;
+ attr->sq_sig_type = cmd_exp->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr->qp_type = cmd_exp->qp_type;
+ attr->create_flags = 0;
+
+ attr->cap.max_send_wr = cmd_exp->max_send_wr;
+ attr->cap.max_recv_wr = cmd_exp->max_recv_wr;
+ attr->cap.max_send_sge = cmd_exp->max_send_sge;
+ attr->cap.max_recv_sge = cmd_exp->max_recv_sge;
+ attr->cap.max_inline_data = cmd_exp->max_inline_data;
+ attr->rx_hash_conf = NULL;
+
+ if (cmd_exp->comp_mask & IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS) {
+ if (cmd_exp->qp_cap_flags & ~IBV_UVERBS_EXP_CREATE_QP_FLAGS) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ attr->create_flags |= cmd_exp->qp_cap_flags;
+ }
+
+ if (cmd_exp->comp_mask & IB_UVERBS_EXP_CREATE_QP_QPG) {
struct ib_uverbs_qpg *qpg;
- if (cmd_exp.qp_type != IB_QPT_RAW_PACKET &&
- cmd_exp.qp_type != IB_QPT_UD) {
+ if (cmd_exp->qp_type != IB_QPT_RAW_PACKET &&
+ cmd_exp->qp_type != IB_QPT_UD) {
ret = -EINVAL;
goto err_put;
}
- qpg = &cmd_exp.qpg;
+ qpg = &cmd_exp->qpg;
switch (qpg->qpg_type) {
case IB_QPG_PARENT:
- attr.parent_attrib.rss_child_count =
+ attr->parent_attrib.rss_child_count =
qpg->parent_attrib.rss_child_count;
- attr.parent_attrib.tss_child_count =
+ attr->parent_attrib.tss_child_count =
qpg->parent_attrib.tss_child_count;
break;
case IB_QPG_CHILD_RX:
@@ -3786,49 +4888,70 @@
ret = -EINVAL;
goto err_put;
}
- attr.qpg_parent = parentqp;
+ attr->qpg_parent = parentqp;
break;
default:
ret = -EINVAL;
goto err_put;
}
- attr.qpg_type = qpg->qpg_type;
+ attr->qpg_type = qpg->qpg_type;
}
- if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV)
- attr.max_inl_recv = cmd_exp.max_inl_recv;
+ if (cmd_exp->comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV)
+ attr->max_inl_recv = cmd_exp->max_inl_recv;
+
+ /* No comp mask bit is needed, the value of rx_hash_function is used */
+ if (cmd_exp->rx_hash_conf.rx_hash_function) {
+ ind_tbl = idr_read_rwq_indirection_table(cmd_exp->rx_hash_conf.rwq_ind_tbl_handle,
+ file->ucontext);
+ if (!ind_tbl) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ rx_hash_conf.rwq_ind_tbl = ind_tbl;
+ rx_hash_conf.rx_hash_fields_mask = cmd_exp->rx_hash_conf.rx_hash_fields_mask;
+ rx_hash_conf.rx_hash_function = cmd_exp->rx_hash_conf.rx_hash_function;
+ rx_hash_conf.rx_hash_key = cmd_exp->rx_hash_conf.rx_hash_key;
+ rx_hash_conf.rx_key_len = cmd_exp->rx_hash_conf.rx_key_len;
+ attr->rx_hash_conf = &rx_hash_conf;
+ }
+ attr->port_num = cmd_exp->port_num;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd_exp.qp_type == IB_QPT_XRC_TGT)
- qp = ib_create_qp(pd, (struct ib_qp_init_attr *)&attr);
+ if (cmd_exp->qp_type == IB_QPT_XRC_TGT)
+ qp = ib_create_qp(pd, (struct ib_qp_init_attr *)attr);
else
- qp = device->exp_create_qp(pd, &attr, uhw);
+ qp = device->exp_create_qp(pd, attr, uhw);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
- if (cmd_exp.qp_type != IB_QPT_XRC_TGT) {
+ if (cmd_exp->qp_type != IB_QPT_XRC_TGT) {
qp->real_qp = qp;
qp->device = device;
qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+ qp->srq = attr->srq;
+ qp->rwq_ind_tbl = ind_tbl;
+ qp->event_handler = attr->event_handler;
+ qp->qp_context = attr->qp_context;
+ qp->qp_type = attr->qp_type;
atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
- atomic_inc(&attr.send_cq->usecnt);
- if (attr.recv_cq)
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
+ if (!rx_qp)
+ atomic_inc(&attr->send_cq->usecnt);
+ if (attr->recv_cq)
+ atomic_inc(&attr->recv_cq->usecnt);
+ if (attr->srq)
+ atomic_inc(&attr->srq->usecnt);
+ if (ind_tbl)
+ atomic_inc(&ind_tbl->usecnt);
}
qp->uobject = &obj->uevent.uobject;
@@ -3840,15 +4963,15 @@
memset(&resp_exp, 0, sizeof(resp_exp));
resp_exp.qpn = qp->qp_num;
resp_exp.qp_handle = obj->uevent.uobject.id;
- resp_exp.max_recv_sge = attr.cap.max_recv_sge;
- resp_exp.max_send_sge = attr.cap.max_send_sge;
- resp_exp.max_recv_wr = attr.cap.max_recv_wr;
- resp_exp.max_send_wr = attr.cap.max_send_wr;
- resp_exp.max_inline_data = attr.cap.max_inline_data;
+ resp_exp.max_recv_sge = attr->cap.max_recv_sge;
+ resp_exp.max_send_sge = attr->cap.max_send_sge;
+ resp_exp.max_recv_wr = attr->cap.max_recv_wr;
+ resp_exp.max_send_wr = attr->cap.max_send_wr;
+ resp_exp.max_inline_data = attr->cap.max_inline_data;
- if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV) {
+ if (cmd_exp->comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV) {
resp_exp.comp_mask |= IB_UVERBS_EXP_CREATE_QP_RESP_INL_RECV;
- resp_exp.max_inl_recv = attr.max_inl_recv;
+ resp_exp.max_inl_recv = attr->max_inl_recv;
}
ret = ucore->ops->copy_to(ucore, &resp_exp, sizeof(resp_exp));
@@ -3871,6 +4994,8 @@
put_srq_read(srq);
if (parentqp)
put_qp_read(parentqp);
+ if (ind_tbl)
+ put_rwq_indirection_table_read(ind_tbl);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -3879,8 +5004,10 @@
obj->uevent.uobject.live = 1;
up_write(&obj->uevent.uobject.mutex);
+ kfree(attr);
+ kfree(cmd_exp);
- return ucore->inlen + uhw->inlen;
+ return 0;
err_copy:
idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -3901,8 +5028,14 @@
put_srq_read(srq);
if (parentqp)
put_qp_read(parentqp);
+ if (ind_tbl)
+ put_rwq_indirection_table_read(ind_tbl);
put_uobj_write(&obj->uevent.uobject);
+
+err_cmd_attr:
+ kfree(attr);
+ kfree(cmd_exp);
return ret;
}
@@ -3911,4 +5044,128 @@
{
return device->exp_query_device(device, device_attr);
}
-EXPORT_SYMBOL(ib_exp_query_device);
+
+int ib_uverbs_exp_create_mr(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_create_mr cmd_exp;
+ struct ib_uverbs_exp_create_mr_resp resp_exp;
+ struct ib_pd *pd = NULL;
+ struct ib_mr *mr = NULL;
+ struct ib_uobject *uobj = NULL;
+ struct ib_mr_init_attr attr;
+ int ret;
+
+ if (ucore->outlen + uhw->outlen < sizeof(resp_exp))
+ return -ENOSPC;
+
+ ret = ucore->ops->copy_from(&cmd_exp, ucore, sizeof(cmd_exp));
+ if (ret)
+ return ret;
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd_exp.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+ /* We first get a new "obj id" to be passed later to reg mr for
+ further use as mr_id.
+ */
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
+ if (ret)
+ goto err_put;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.flags = cmd_exp.create_flags;
+ attr.exp_access_flags = cmd_exp.exp_access_flags;
+ attr.max_reg_descriptors = cmd_exp.max_reg_descriptors;
+ mr = ib_create_mr(pd, &attr);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_remove_uobj;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = uobj;
+ atomic_set(&mr->usecnt, 0);
+
+ uobj->object = mr;
+
+ memset(&resp_exp, 0, sizeof(resp_exp));
+ resp_exp.lkey = mr->lkey;
+ resp_exp.rkey = mr->rkey;
+ resp_exp.handle = uobj->id;
+
+ ret = ucore->ops->copy_to(ucore, &resp_exp, sizeof(resp_exp));
+ if (ret)
+ goto err_copy;
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->mr_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return 0;
+
+err_copy:
+ ib_dereg_mr(mr);
+
+err_remove_uobj:
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+int ib_uverbs_exp_query_mkey(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_query_mkey cmd_exp;
+ struct ib_uverbs_exp_query_mkey_resp resp_exp;
+ struct ib_mr *mr;
+ struct ib_mkey_attr mkey_attr;
+ int ret;
+
+ memset(&cmd_exp, 0, sizeof(cmd_exp));
+ ret = ucore->ops->copy_from(&cmd_exp, ucore, sizeof(cmd_exp));
+ if (ret)
+ return ret;
+
+ mr = idr_read_mr(cmd_exp.handle, file->ucontext);
+ if (!mr)
+ return -EINVAL;
+
+ ret = ib_query_mkey(mr, 0, &mkey_attr);
+ if (ret)
+ return ret;
+
+ put_mr_read(mr);
+
+ memset(&resp_exp, 0, sizeof(resp_exp));
+ resp_exp.max_reg_descriptors = mkey_attr.max_reg_descriptors;
+
+ ret = ucore->ops->copy_to(ucore, &resp_exp, sizeof(resp_exp));
+ if (ret)
+ return ret;
+
+ return 0;
+}
Index: sys/ofed/drivers/infiniband/core/uverbs_main.c
===================================================================
--- sys/ofed/drivers/infiniband/core/uverbs_main.c
+++ sys/ofed/drivers/infiniband/core/uverbs_main.c
@@ -51,9 +51,7 @@
#include "uverbs.h"
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("InfiniBand userspace verbs access");
-MODULE_LICENSE("Dual BSD/GPL");
+/* InfiniBand userspace verbs access */
enum {
IB_UVERBS_MAJOR = 231,
@@ -81,10 +79,11 @@
#define INIT_UDATA_EX(udata, ibuf, obuf, ilen, olen) \
do { \
(udata)->ops = &uverbs_copy_ex; \
- (udata)->inbuf = (void __user *)(unsigned long)(ibuf); \
- (udata)->outbuf = (void __user *)(unsigned long)(obuf); \
+ (udata)->inbuf = (void __user *)(ibuf); \
+ (udata)->outbuf = (void __user *)(obuf); \
(udata)->inlen = (ilen); \
(udata)->outlen = (olen); \
+ (udata)->src = IB_UDATA_EX_CMD; \
} while (0)
@@ -101,6 +100,8 @@
DEFINE_IDR(ib_uverbs_xrcd_idr);
DEFINE_IDR(ib_uverbs_rule_idr);
DEFINE_IDR(ib_uverbs_dct_idr);
+DEFINE_IDR(ib_uverbs_wq_idr);
+DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -108,36 +109,37 @@
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
[IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
+ [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
+ [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
+ [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
+ [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
+ [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
+ [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+ [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
+ [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
+ [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
[IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
[IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
@@ -150,10 +152,11 @@
[IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
[IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
};
+typedef int (*uverbs_ex_cmd)(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw);
-static ssize_t (*uverbs_exp_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw) = {
+static uverbs_ex_cmd uverbs_exp_cmd_table[] = {
[IB_USER_VERBS_EXP_CMD_CREATE_QP] = ib_uverbs_exp_create_qp,
[IB_USER_VERBS_EXP_CMD_MODIFY_CQ] = ib_uverbs_exp_modify_cq,
[IB_USER_VERBS_EXP_CMD_MODIFY_QP] = ib_uverbs_exp_modify_qp,
@@ -162,6 +165,20 @@
[IB_USER_VERBS_EXP_CMD_CREATE_DCT] = ib_uverbs_exp_create_dct,
[IB_USER_VERBS_EXP_CMD_DESTROY_DCT] = ib_uverbs_exp_destroy_dct,
[IB_USER_VERBS_EXP_CMD_QUERY_DCT] = ib_uverbs_exp_query_dct,
+ [IB_USER_VERBS_EXP_CMD_ARM_DCT] = ib_uverbs_exp_arm_dct,
+ [IB_USER_VERBS_EXP_CMD_CREATE_MR] = ib_uverbs_exp_create_mr,
+ [IB_USER_VERBS_EXP_CMD_QUERY_MKEY] = ib_uverbs_exp_query_mkey,
+ [IB_USER_VERBS_EXP_CMD_REG_MR_EX] = ib_uverbs_exp_reg_mr_ex,
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ [IB_USER_VERBS_EXP_CMD_PREFETCH_MR] = ib_uverbs_exp_prefetch_mr,
+#endif
+ [IB_USER_VERBS_EXP_CMD_REREG_MR] = ib_uverbs_exp_rereg_mr,
+ [IB_USER_VERBS_EXP_CMD_CREATE_WQ] = ib_uverbs_exp_create_wq,
+ [IB_USER_VERBS_EXP_CMD_MODIFY_WQ] = ib_uverbs_exp_modify_wq,
+ [IB_USER_VERBS_EXP_CMD_DESTROY_WQ] = ib_uverbs_exp_destroy_wq,
+ [IB_USER_VERBS_EXP_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_exp_create_rwq_ind_table,
+ [IB_USER_VERBS_EXP_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_exp_destroy_rwq_ind_table,
+ [IB_USER_VERBS_EXP_CMD_CREATE_FLOW] = ib_uverbs_exp_create_flow,
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -172,7 +189,12 @@
struct ib_uverbs_device *dev =
container_of(ref, struct ib_uverbs_device, ref);
- complete(&dev->comp);
+ if (dev->disassociated) {
+ cleanup_srcu_struct(&dev->disassociate_srcu);
+ kfree(dev);
+ } else {
+ complete(&dev->comp);
+ }
}
static void ib_uverbs_release_event_file(struct kref *ref)
@@ -257,13 +279,10 @@
struct ib_mw *mw = uobj->object;
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- err = ib_dealloc_mw(mw);
- if (err) {
- pr_info("user_verbs: couldn't deallocate MW during cleanup.\n");
- pr_info("user_verbs: the system may have become unstable.\n");
- }
+ ib_dealloc_mw(mw);
kfree(uobj);
}
+
list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
struct ib_flow *flow_id = uobj->object;
@@ -291,7 +310,7 @@
list_for_each_entry_safe(uobj, tmp, &context->dct_list, list) {
struct ib_dct *dct = uobj->object;
struct ib_udct_object *udct =
- container_of(uobj, struct ib_udct_object, uobject);
+ container_of(uobj, struct ib_udct_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_dct_idr, uobj);
@@ -302,6 +321,29 @@
kfree(udct);
}
+ list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+
+ idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
+ err = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (err)
+ pr_info("destroying uverbs rwq_ind_tbl failed: err %d\n", err);
+
+ kfree(ind_tbl);
+ kfree(uobj);
+ }
+
+ list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
+ struct ib_wq *wq = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
+ err = ib_destroy_wq(wq);
+ if (err)
+ pr_info("destroying uverbs wq failed: err %d\n", err);
+ kfree(uobj);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
@@ -325,7 +367,6 @@
err = ib_destroy_cq(cq);
if (err)
pr_info("destroying uverbs cq failed: err %d\n", err);
-
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
@@ -370,7 +411,9 @@
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
- module_put(file->device->ib_dev->owner);
+ if (!(file->device->flags & UVERBS_FLAG_DISASSOCIATE))
+ module_put(file->device->ib_dev->owner);
+
kref_put(&file->device->ref, ib_uverbs_release_dev);
kfree(file);
@@ -393,9 +436,15 @@
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->event_list)))
+ (!list_empty(&file->event_list) ||
+ file->uverbs_file->device->disassociated)))
+ /* will reach here in case signal has occoured */
return -ERESTARTSYS;
+ /* We reach here once list is not empty or once device was disassociated */
+ if (list_empty(&file->event_list) && file->uverbs_file->device->disassociated)
+ return -EIO;
+
spin_lock_irq(&file->lock);
}
@@ -469,18 +518,23 @@
}
spin_unlock_irq(&file->lock);
- if (file->is_async) {
- ib_unregister_event_handler(&file->uverbs_file->event_handler);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+ mutex_lock(&file->uverbs_file->device->disassociate_mutex);
+ if (!file->uverbs_file->device->disassociated) {
+ list_del(&file->list);
+ if (file->is_async)
+ ib_unregister_event_handler(&file->uverbs_file->event_handler);
}
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ mutex_unlock(&file->uverbs_file->device->disassociate_mutex);
+
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+ kref_put(&file->ref, ib_uverbs_release_event_file);
return 0;
}
static const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
+ .read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
.fasync = ib_uverbs_event_fasync,
@@ -527,7 +581,7 @@
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
__u64 element, __u64 event,
struct list_head *obj_list,
- u32 *counter)
+ u32 *counter, u32 rsc_type)
{
struct ib_uverbs_event *entry;
unsigned long flags;
@@ -547,6 +601,7 @@
entry->desc.async.element = element;
entry->desc.async.event_type = event;
entry->counter = counter;
+ entry->desc.async.rsc_type = rsc_type;
list_add_tail(&entry->list, &file->async_file->event_list);
if (obj_list)
@@ -566,19 +621,23 @@
ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
event->event, &uobj->async_list,
- &uobj->async_events_reported);
+ &uobj->async_events_reported, IB_EVENT_RSC_CQ);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
+ /* for XRC target qp's, check that qp is live */
+ if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ return;
+
uobj = container_of(event->element.qp->uobject,
struct ib_uevent_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
- &uobj->events_reported);
+ &uobj->events_reported, IB_EVENT_RSC_QP);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
@@ -590,7 +649,7 @@
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
- &uobj->events_reported);
+ &uobj->events_reported, IB_EVENT_RSC_SRQ);
}
void ib_uverbs_event_handler(struct ib_event_handler *handler,
@@ -600,7 +659,7 @@
container_of(handler, struct ib_uverbs_file, event_handler);
ib_uverbs_async_handler(file, event->element.port_num, event->event,
- NULL, NULL);
+ NULL, NULL, IB_EVENT_RSC_DEVICE);
}
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
@@ -621,19 +680,33 @@
ev_file->is_async = is_async;
/*
- * fops_get() can't fail here, because we're coming from a
- * system call on a uverbs file, which will already have a
- * module reference.
- */
+ * fops_get() can't fail here, because we're coming from a
+ * system call on a uverbs file, which will already have a
+ * module reference.
+ */
filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops));
if (IS_ERR(filp)) {
kfree(ev_file);
+ return filp;
} else {
- filp->private_data = ev_file;
+ filp->private_data = ev_file;
+ }
+
+ mutex_lock(&uverbs_file->device->disassociate_mutex);
+ if (!uverbs_file->device->disassociated) {
+ list_add_tail(&ev_file->list,
+ &uverbs_file->device->uverbs_events_file_list);
+ mutex_unlock(&uverbs_file->device->disassociate_mutex);
+
+ return filp;
}
- return filp;
+ mutex_unlock(&uverbs_file->device->disassociate_mutex);
+
+ fput(filp);
+ kfree(ev_file);
+ return ERR_PTR(-EIO);
}
/*
@@ -665,302 +738,164 @@
return ev_file;
}
-static const char *verbs_cmd_str(__u32 cmd)
-{
- switch (cmd) {
- case IB_USER_VERBS_CMD_GET_CONTEXT:
- return "GET_CONTEXT";
- case IB_USER_VERBS_CMD_QUERY_DEVICE:
- return "QUERY_DEVICE";
- case IB_USER_VERBS_CMD_QUERY_PORT:
- return "QUERY_PORT";
- case IB_USER_VERBS_CMD_ALLOC_PD:
- return "ALLOC_PD";
- case IB_USER_VERBS_CMD_DEALLOC_PD:
- return "DEALLOC_PD";
- case IB_USER_VERBS_CMD_REG_MR:
- return "REG_MR";
- case IB_USER_VERBS_CMD_DEREG_MR:
- return "DEREG_MR";
- case IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL:
- return "CREATE_COMP_CHANNEL";
- case IB_USER_VERBS_CMD_CREATE_CQ:
- return "CREATE_CQ";
- case IB_USER_VERBS_CMD_RESIZE_CQ:
- return "RESIZE_CQ";
- case IB_USER_VERBS_CMD_POLL_CQ:
- return "POLL_CQ";
- case IB_USER_VERBS_CMD_REQ_NOTIFY_CQ:
- return "REQ_NOTIFY_CQ";
- case IB_USER_VERBS_CMD_DESTROY_CQ:
- return "DESTROY_CQ";
- case IB_USER_VERBS_CMD_CREATE_QP:
- return "CREATE_QP";
- case IB_USER_VERBS_CMD_QUERY_QP:
- return "QUERY_QP";
- case IB_USER_VERBS_CMD_MODIFY_QP:
- return "MODIFY_QP";
- case IB_USER_VERBS_CMD_DESTROY_QP:
- return "DESTROY_QP";
- case IB_USER_VERBS_CMD_POST_SEND:
- return "POST_SEND";
- case IB_USER_VERBS_CMD_POST_RECV:
- return "POST_RECV";
- case IB_USER_VERBS_CMD_POST_SRQ_RECV:
- return "POST_SRQ_RECV";
- case IB_USER_VERBS_CMD_CREATE_AH:
- return "CREATE_AH";
- case IB_USER_VERBS_CMD_DESTROY_AH:
- return "DESTROY_AH";
- case IB_USER_VERBS_CMD_ATTACH_MCAST:
- return "ATTACH_MCAST";
- case IB_USER_VERBS_CMD_DETACH_MCAST:
- return "DETACH_MCAST";
- case IB_USER_VERBS_CMD_CREATE_SRQ:
- return "CREATE_SRQ";
- case IB_USER_VERBS_CMD_MODIFY_SRQ:
- return "MODIFY_SRQ";
- case IB_USER_VERBS_CMD_QUERY_SRQ:
- return "QUERY_SRQ";
- case IB_USER_VERBS_CMD_DESTROY_SRQ:
- return "DESTROY_SRQ";
- case IB_USER_VERBS_CMD_OPEN_XRCD:
- return "OPEN_XRCD";
- case IB_USER_VERBS_CMD_CLOSE_XRCD:
- return "CLOSE_XRCD";
- case IB_USER_VERBS_CMD_CREATE_XSRQ:
- return "CREATE_XSRQ";
- case IB_USER_VERBS_CMD_OPEN_QP:
- return "OPEN_QP";
- }
-
- return "Unknown command";
-}
-
enum {
COMMAND_INFO_MASK = 0x1000,
};
-static ssize_t ib_uverbs_exp_handle_cmd(struct ib_uverbs_file *file,
- const char __user *buf,
- struct ib_device *dev,
- struct ib_uverbs_cmd_hdr *hdr,
- size_t count,
- int legacy_ex_cmd)
-{
- struct ib_udata ucore;
- struct ib_udata uhw;
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
- __u32 command = hdr->command - IB_USER_VERBS_EXP_CMD_FIRST;
-
- if (hdr->command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
-
- if (command >= ARRAY_SIZE(uverbs_exp_cmd_table) ||
- !uverbs_exp_cmd_table[command])
- return -EINVAL;
-
- if (!file->ucontext)
- return -EINVAL;
-
- if (!(dev->uverbs_exp_cmd_mask & (1ull << command)))
- return -ENOSYS;
-
- if (legacy_ex_cmd) {
- struct ib_uverbs_ex_cmd_hdr_legacy hxl;
- struct ib_uverbs_ex_cmd_resp1_legacy resp1;
- __u64 response;
- ssize_t ret;
-
- if (count < sizeof(hxl))
- return -EINVAL;
-
- if (copy_from_user(&hxl, buf, sizeof(hxl)))
- return -EFAULT;
-
- if (((hxl.in_words + hxl.provider_in_words) * 4) != count)
- return -EINVAL;
-
- count -= sizeof(hxl);
- buf += sizeof(hxl);
- if (hxl.out_words || hxl.provider_out_words) {
- if (count < sizeof(resp1))
- return -EINVAL;
- if (copy_from_user(&resp1, buf, sizeof(resp1)))
- return -EFAULT;
- response = resp1.response;
- if (!response)
- return -EINVAL;
-
- /*
- * Change user buffer to comply with new extension format.
- */
- if (sizeof(resp1.comp_mask) != sizeof(resp1.response))
- return -EFAULT;
- buf += sizeof(resp1.comp_mask);
- if (copy_to_user(__DECONST(void __user *, buf), &resp1.comp_mask,
- sizeof(resp1.response)))
- return -EFAULT;
-
- } else {
- response = 0;
- }
-
- INIT_UDATA_EX(&ucore,
- (hxl.in_words) ? buf : 0,
- response,
- hxl.in_words * 4,
- hxl.out_words * 4);
-
- INIT_UDATA_EX(&uhw,
- (hxl.provider_in_words) ? buf + ucore.inlen : 0,
- (hxl.provider_out_words) ? response + ucore.outlen : 0,
- hxl.provider_in_words * 4,
- hxl.provider_out_words * 4);
-
- ret = uverbs_exp_cmd_table[command](file, &ucore, &uhw);
- /*
- * UnChange user buffer
- */
- if (response && copy_to_user(__DECONST(void __user *, buf), &resp1.response, sizeof(resp1.response)))
- return -EFAULT;
-
- return ret;
- } else {
- if (count < (sizeof(hdr) + sizeof(ex_hdr)))
- return -EINVAL;
-
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
- return -EFAULT;
-
- buf += sizeof(hdr) + sizeof(ex_hdr);
-
- if ((hdr->in_words + ex_hdr.provider_in_words) * 8 != count)
- return -EINVAL;
-
- if (ex_hdr.response) {
- if (!hdr->out_words && !ex_hdr.provider_out_words)
- return -EINVAL;
- } else {
- if (hdr->out_words || ex_hdr.provider_out_words)
- return -EINVAL;
- }
-
- INIT_UDATA_EX(&ucore,
- (hdr->in_words) ? buf : 0,
- (unsigned long)ex_hdr.response,
- hdr->in_words * 8,
- hdr->out_words * 8);
-
- INIT_UDATA_EX(&uhw,
- (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
- (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
- ex_hdr.provider_in_words * 8,
- ex_hdr.provider_out_words * 8);
-
- return uverbs_exp_cmd_table[command](file, &ucore, &uhw);
- }
-}
-
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_device *dev = file->device->ib_dev;
struct ib_uverbs_cmd_hdr hdr;
- struct timespec ts1;
- struct timespec ts2;
- ktime_t t1, t2, delta;
- s64 ds;
- ssize_t ret;
- u64 dividend;
- u32 divisor;
- __u32 flags;
__u32 command;
- int legacy_ex_cmd = 0;
+ int exp_cmd;
size_t written_count = count;
+ __u32 flags;
+ int srcu_key;
+ ssize_t ret;
- if (count < sizeof hdr)
+ if (count < sizeof hdr) {
+ pr_debug("ib_uverbs_write: header too short\n");
return -EINVAL;
+ }
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
- /*
- * For BWD compatibility change old style extension verbs commands
- * to their equivalent experimental command.
- */
- if ((hdr.command >= IB_USER_VERBS_LEGACY_CMD_FIRST) &&
- (hdr.command <= IB_USER_VERBS_LEGACY_EX_CMD_LAST)) {
- hdr.command += IB_USER_VERBS_EXP_CMD_FIRST -
- IB_USER_VERBS_LEGACY_CMD_FIRST;
- legacy_ex_cmd = 1;
- }
-
flags = (hdr.command &
IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ exp_cmd = !flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST);
- ktime_get_ts(&ts1);
- if (!flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST)) {
- ret = ib_uverbs_exp_handle_cmd(file, buf, dev, &hdr, count, legacy_ex_cmd);
- } else if (!flags) {
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ if (file->device->disassociated) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (!flags && !exp_cmd) {
if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[command])
- return -EINVAL;
+ !uverbs_cmd_table[command]) {
+ pr_debug("ib_uverbs_write: unexpected command\n");
+ ret = -EINVAL;
+ goto out;
+ }
if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT)
- return -EINVAL;
+ command != IB_USER_VERBS_CMD_GET_CONTEXT) {
+ pr_debug("ib_uverbs_write: invalid context\n");
+ ret = -EINVAL;
+ goto out;
+ }
- if (!(dev->uverbs_cmd_mask & (1ull << command)))
- return -ENOSYS;
+ if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command))) {
+ pr_debug("ib_uverbs_write: command not support by the device\n");
+ ret = -ENOSYS;
+ goto out;
+ }
- if (hdr.in_words * 4 != count)
- return -EINVAL;
+ if (hdr.in_words * 4 != count) {
+ pr_debug("ib_uverbs_write: header input length doesn't match written length\n");
+ ret = -EINVAL;
+ goto out;
+ }
ret = uverbs_cmd_table[command](file,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
- } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+
+ } else if ((flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) || exp_cmd) {
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_udata ucore;
struct ib_udata uhw;
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ int arr_size;
+ uverbs_ex_cmd *cmd_tbl;
+ u64 cmd_mask;
+
+ if (exp_cmd) {
+ command = hdr.command - IB_USER_VERBS_EXP_CMD_FIRST;
+ arr_size = ARRAY_SIZE(uverbs_exp_cmd_table);
+ cmd_tbl = uverbs_exp_cmd_table;
+ cmd_mask = dev->uverbs_exp_cmd_mask;
+ } else {
+ arr_size = ARRAY_SIZE(uverbs_ex_cmd_table);
+ cmd_tbl = uverbs_ex_cmd_table;
+ cmd_mask = dev->uverbs_ex_cmd_mask;
+ }
if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
+ IB_USER_VERBS_CMD_COMMAND_MASK)) {
+ pr_debug("ib_uverbs_write: extended command invalid opcode\n");
+ ret = -EINVAL;
+ goto out;
+ }
- if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
- !uverbs_ex_cmd_table[command])
- return -EINVAL;
+ if (command >= arr_size || !cmd_tbl[command]) {
+ pr_debug("ib_uverbs_write: invalid extended command\n");
+ ret = -EINVAL;
+ goto out;
+ }
- if (!file->ucontext)
- return -EINVAL;
+ if (!file->ucontext) {
+ pr_debug("ib_uverbs_write: invalid context in extended command\n");
+ ret = -EINVAL;
+ goto out;
+ }
- if (!(dev->uverbs_ex_cmd_mask & (1ull << command)))
- return -ENOSYS;
+ if (!(cmd_mask & (1ull << command))) {
+ pr_debug("ib_uverbs_write: extended command not supported by driver\n");
+ ret = -ENOSYS;
+ goto out;
+ }
- if (count < (sizeof(hdr) + sizeof(ex_hdr)))
- return -EINVAL;
+ if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
+ pr_debug("ib_uverbs_write: ex header input length doesn't match written length\n");
+ ret = -EINVAL;
+ goto out;
+ }
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
- return -EFAULT;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
+ ret = -EFAULT;
+ goto out;
+ }
count -= sizeof(hdr) + sizeof(ex_hdr);
buf += sizeof(hdr) + sizeof(ex_hdr);
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
- return -EINVAL;
+ if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
+ pr_debug("ib_uverbs_write: extended command doesn't match written length\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (ex_hdr.cmd_hdr_reserved) {
+ ret = -EINVAL;
+ goto out;
+ }
if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words)
- return -EINVAL;
+ if (!hdr.out_words && !ex_hdr.provider_out_words) {
+ pr_debug("ib_uverbs_write: got response pointer to a zero length buffer\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+/*
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *) (unsigned long) ex_hdr.response,
+ (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
+ ret = -EFAULT;
+ goto out;
+ }
+*/
} else {
- if (hdr.out_words || ex_hdr.provider_out_words)
- return -EINVAL;
+ if (hdr.out_words || ex_hdr.provider_out_words) {
+ pr_debug("ib_uverbs_write: got NULL response pointer but non-zero output length\n");
+ ret = -EINVAL;
+ goto out;
+ }
}
INIT_UDATA_EX(&ucore,
@@ -974,84 +909,78 @@
(ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
ex_hdr.provider_in_words * 8,
ex_hdr.provider_out_words * 8);
+ if (exp_cmd) {
+ ucore.src = IB_UDATA_EXP_CMD;
+ uhw.src = IB_UDATA_EXP_CMD;
+ }
- ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
-
- if (ret)
- return ret;
+ ret = cmd_tbl[command](file, &ucore, &uhw);
+ if (!ret)
+ ret = written_count;
- return written_count;
+ goto out;
} else {
- return -EFAULT;
+ ret = -EFAULT;
+ goto out;
}
- if ((dev->cmd_perf & (COMMAND_INFO_MASK - 1)) == hdr.command) {
- ktime_get_ts(&ts2);
- t1 = timespec_to_ktime(ts1);
- t2 = timespec_to_ktime(ts2);
- delta = ktime_sub(t2, t1);
- ds = ktime_to_ns(delta);
- spin_lock(&dev->cmd_perf_lock);
- dividend = dev->cmd_avg * dev->cmd_n + ds;
- ++dev->cmd_n;
- divisor = dev->cmd_n;
- do_div(dividend, divisor);
- dev->cmd_avg = dividend;
- spin_unlock(&dev->cmd_perf_lock);
- if (dev->cmd_perf & COMMAND_INFO_MASK) {
- pr_info("%s: %s execution time = %lld nsec\n",
- file->device->ib_dev->name,
- verbs_cmd_str(hdr.command),
- (long long)ds);
- }
- }
+out:
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
+ int ret = 0;
+ int srcu_key;
+
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ if (file->device->disassociated) {
+ ret = -EIO;
+ goto out;
+ }
if (!file->ucontext)
- return -ENODEV;
+ ret = -ENODEV;
else
- return file->device->ib_dev->mmap(file->ucontext, vma);
+ ret = file->device->ib_dev->mmap(file->ucontext, vma);
+out:
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return ret;
}
-/* XXX Not supported in FreeBSD */
-#if 0
-static unsigned long ib_uverbs_get_unmapped_area(struct file *filp,
- unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+
+static long ib_uverbs_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
{
struct ib_uverbs_file *file = filp->private_data;
+ long ret = 0;
+ int srcu_key;
- if (!file->ucontext)
- return -ENODEV;
- else {
- if (!file->device->ib_dev->get_unmapped_area)
- return current->mm->get_unmapped_area(filp, addr, len,
- pgoff, flags);
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- return file->device->ib_dev->get_unmapped_area(filp, addr, len,
- pgoff, flags);
+ if (file->device->disassociated) {
+ ret = -EIO;
+ goto out;
}
-}
-#endif
-
-static long ib_uverbs_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct ib_uverbs_file *file = filp->private_data;
- if (!file->device->ib_dev->ioctl)
- return -ENOTSUPP;
+ if (!file->device->ib_dev->ioctl) {
+ ret = -ENOTSUPP;
+ goto out;
+ }
- if (!file->ucontext)
- return -ENODEV;
- else
+ if (!file->ucontext) {
+ ret = -ENODEV;
+ goto out;
+ } else {
/* provider should provide it's own locking mechanism */
- return file->device->ib_dev->ioctl(file->ucontext, cmd, arg);
+ ret = file->device->ib_dev->ioctl(file->ucontext, cmd, arg);
+ }
+
+out:
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return ret;
}
/*
@@ -1069,6 +998,7 @@
struct ib_uverbs_device *dev;
struct ib_uverbs_file *file;
int ret;
+ int module_dependent;
dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev);
if (dev)
@@ -1076,15 +1006,31 @@
else
return -ENXIO;
- if (!try_module_get(dev->ib_dev->owner)) {
- ret = -ENODEV;
+ mutex_lock(&dev->disassociate_mutex);
+ if (dev->disassociated) {
+ ret = -EIO;
goto err;
}
- file = kmalloc(sizeof *file, GFP_KERNEL);
+ /* In case IB device supports disassociate ucontext, there is no hard
+ * dependency between uverbs device and its low level device.
+ */
+ module_dependent = !(dev->flags & UVERBS_FLAG_DISASSOCIATE);
+
+ if (module_dependent) {
+ if (!try_module_get(dev->ib_dev->owner)) {
+ ret = -ENODEV;
+ goto err;
+ }
+ }
+
+ file = kzalloc(sizeof *file, GFP_KERNEL);
if (!file) {
ret = -ENOMEM;
- goto err_module;
+ if (module_dependent)
+ goto err_module;
+
+ goto err;
}
file->device = dev;
@@ -1094,6 +1040,8 @@
mutex_init(&file->mutex);
filp->private_data = file;
+ list_add_tail(&file->list, &dev->uverbs_file_list);
+ mutex_unlock(&dev->disassociate_mutex);
return nonseekable_open(inode, filp);
@@ -1101,6 +1049,7 @@
module_put(dev->ib_dev->owner);
err:
+ mutex_unlock(&dev->disassociate_mutex);
kref_put(&dev->ref, ib_uverbs_release_dev);
return ret;
}
@@ -1108,9 +1057,26 @@
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
+ struct ib_ucontext *ucontext = NULL;
+ int srcu_key;
+
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ mutex_lock(&file->device->disassociate_mutex);
+ if (!file->device->disassociated) {
+ /* No need to remove from the list once alreday disassociated.
+ * Try doing that might race with ib_uverbs_free_hw_resources
+ * as mutex is not held by that time.
+ */
+ list_del(&file->list);
+ ucontext = file->ucontext;
+ }
+
+ mutex_unlock(&file->device->disassociate_mutex);
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ if (ucontext)
+ ib_uverbs_cleanup_ucontext(file, ucontext);
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
@@ -1120,25 +1086,21 @@
}
static const struct file_operations uverbs_fops = {
- .owner = THIS_MODULE,
- .write = ib_uverbs_write,
- .open = ib_uverbs_open,
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
- .owner = THIS_MODULE,
- .write = ib_uverbs_write,
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
.mmap = ib_uverbs_mmap,
- .open = ib_uverbs_open,
+ .open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-/* XXX Not supported in FreeBSD */
-#if 0
- .get_unmapped_area = ib_uverbs_get_unmapped_area,
-#endif
.unlocked_ioctl = ib_uverbs_ioctl,
};
@@ -1160,18 +1122,6 @@
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static ssize_t show_dev_ref_cnt(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
-
- if (!dev)
- return -ENODEV;
-
- return sprintf(buf, "%d\n", atomic_read(&dev->ref.refcount));
-}
-static DEVICE_ATTR(ref_cnt, S_IRUGO, show_dev_ref_cnt, NULL);
-
static ssize_t show_dev_abi_version(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -1186,7 +1136,7 @@
static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
+ return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
@@ -1229,7 +1179,7 @@
return -ENODEV;
return sprintf(buf, "0x%04x\n",
- ((struct pci_dev *)dev->ib_dev->dma_device)->device);
+ ((struct pci_dev *)dev->ib_dev->dma_device)->device);
}
static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL);
@@ -1242,7 +1192,7 @@
return -ENODEV;
return sprintf(buf, "0x%04x\n",
- ((struct pci_dev *)dev->ib_dev->dma_device)->vendor);
+ ((struct pci_dev *)dev->ib_dev->dma_device)->vendor);
}
static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL);
@@ -1255,8 +1205,8 @@
};
static struct attribute_group device_group = {
- .name = "device",
- .attrs = device_attrs
+ .name = "device",
+ .attrs = device_attrs
};
static void ib_uverbs_add_one(struct ib_device *device)
@@ -1264,6 +1214,7 @@
int devnum;
dev_t base;
struct ib_uverbs_device *uverbs_dev;
+ int ret;
if (!device->alloc_ucontext)
return;
@@ -1276,6 +1227,13 @@
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
+ mutex_init(&uverbs_dev->disassociate_mutex);
+ ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
+ if (ret)
+ goto err_init;
+
+ INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
+ INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -1283,7 +1241,7 @@
spin_unlock(&map_lock);
devnum = find_overflow_devnum();
if (devnum < 0)
- goto err;
+ goto err;
spin_lock(&map_lock);
uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
@@ -1314,13 +1272,14 @@
if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
goto err_class;
- if (device_create_file(uverbs_dev->dev, &dev_attr_ref_cnt))
- goto err_class;
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group))
goto err_class;
+ if (device->disassociate_ucontext)
+ uverbs_dev->flags |= UVERBS_FLAG_DISASSOCIATE;
+
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
@@ -1336,15 +1295,71 @@
clear_bit(devnum, overflow_map);
err:
+ cleanup_srcu_struct(&uverbs_dev->disassociate_srcu);
+
+err_init:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
return;
}
+static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev)
+{
+ struct ib_uverbs_file *file, *tmp_file;
+ struct ib_uverbs_event_file *event_file, *tmp_event_file;
+ struct ib_event event;
+
+ mutex_lock(&uverbs_dev->disassociate_mutex);
+ uverbs_dev->disassociated = 1;
+ /* We must release the mutex before going ahead and calling
+ * disassociate_ucontext as a nested call to uverbs_close might
+ * be called as a result of freeing the resources (e.g mmput).
+ * In addition, we should take an extra ref count on files to prevent
+ * them being freed as part of parallel file closing, from other task
+ * or from event occurs internally from that one.
+ */
+ list_for_each_entry(file, &uverbs_dev->uverbs_file_list, list)
+ kref_get(&file->ref);
+ list_for_each_entry(event_file, &uverbs_dev->uverbs_events_file_list, list)
+ kref_get(&event_file->ref);
+ mutex_unlock(&uverbs_dev->disassociate_mutex);
+
+ /* pending running commands to terminate */
+ synchronize_srcu(&uverbs_dev->disassociate_srcu);
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.element.port_num = 0;
+ event.device = uverbs_dev->ib_dev;
+
+ list_for_each_entry(file, &uverbs_dev->uverbs_file_list, list) {
+ ib_uverbs_event_handler(&file->event_handler, &event);
+ uverbs_dev->ib_dev->disassociate_ucontext(file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ }
+
+ list_for_each_entry(event_file, &uverbs_dev->uverbs_events_file_list, list) {
+ if (event_file->is_async) {
+ /* ib_device is freed once that function/remove_one is
+ * finished, must unregister the event handler before.
+ */
+ ib_unregister_event_handler(&event_file->uverbs_file->event_handler);
+ }
+
+ wake_up_interruptible(&event_file->poll_wait);
+ kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ }
+
+ /* we need a safe iterator as file might be freed as part of loop */
+ list_for_each_entry_safe(file, tmp_file, &uverbs_dev->uverbs_file_list, list)
+ kref_put(&file->ref, ib_uverbs_release_file);
+
+ list_for_each_entry_safe(event_file, tmp_event_file, &uverbs_dev->uverbs_events_file_list, list)
+ kref_put(&event_file->ref, ib_uverbs_release_event_file);
+}
static void ib_uverbs_remove_one(struct ib_device *device)
{
struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+ int wait_clients = 1;
if (!uverbs_dev)
return;
@@ -1355,13 +1370,33 @@
cdev_del(&uverbs_dev->cdev);
if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(uverbs_dev->devnum, dev_map);
+ clear_bit(uverbs_dev->devnum, dev_map);
else
clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+ if (uverbs_dev->flags & UVERBS_FLAG_DISASSOCIATE) {
+ /* We disassociate HW resources and immediately returning, not
+ * pending to active userspace clients. Upon returning ib_device
+ * may be freed internally and is not valid any more.
+ * uverbs_device is still available, when all clients close
+ * their files, the uverbs device ref count will be zero and its
+ * resources will be freed.
+ * Note: At that step no more files can be opened on that cdev
+ * as it was deleted, however active clients can still issue
+ * commands and close their open files.
+ */
+ ib_uverbs_free_hw_resources(uverbs_dev);
+ wait_clients = 0;
+ /* ib device can no longer be accessed. It is freed when this procedure returns. */
+ uverbs_dev->ib_dev = NULL;
+ }
+ /* ref count taken as part of add one is put back in both modes.*/
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
- wait_for_completion(&uverbs_dev->comp);
- kfree(uverbs_dev);
+ if (wait_clients) {
+ wait_for_completion(&uverbs_dev->comp);
+ cleanup_srcu_struct(&uverbs_dev->disassociate_srcu);
+ kfree(uverbs_dev);
+ }
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)
@@ -1431,5 +1466,5 @@
idr_destroy(&ib_uverbs_srq_idr);
}
-module_init(ib_uverbs_init);
+module_init_order(ib_uverbs_init, SI_ORDER_THIRD);
module_exit(ib_uverbs_cleanup);
Index: sys/ofed/drivers/infiniband/core/uverbs_marshall.c
===================================================================
--- sys/ofed/drivers/infiniband/core/uverbs_marshall.c
+++ sys/ofed/drivers/infiniband/core/uverbs_marshall.c
@@ -140,5 +140,10 @@
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
+
+ memset(dst->dmac, 0, sizeof(dst->dmac));
+ dst->net = NULL;
+ dst->ifindex = 0;
+ dst->gid_type = IB_GID_TYPE_IB;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
Index: sys/ofed/drivers/infiniband/core/verbs.c
===================================================================
--- sys/ofed/drivers/infiniband/core/verbs.c
+++ sys/ofed/drivers/infiniband/core/verbs.c
@@ -39,14 +39,22 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#include <netinet/ip.h>
+#include <machine/in_cksum.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
-int ib_rate_to_mult(enum ib_rate rate)
+#include "core_priv.h"
+
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 1;
@@ -63,7 +71,7 @@
}
EXPORT_SYMBOL(ib_rate_to_mult);
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
case 1: return IB_RATE_2_5_GBPS;
@@ -80,7 +88,7 @@
}
EXPORT_SYMBOL(mult_to_ib_rate);
-int ib_rate_to_mbps(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 2500;
@@ -105,7 +113,7 @@
}
EXPORT_SYMBOL(ib_rate_to_mbps);
-enum rdma_transport_type
+__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
switch (node_type) {
@@ -115,8 +123,6 @@
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
- case RDMA_NODE_MIC:
- return RDMA_TRANSPORT_SCIF;
default:
BUG();
return 0;
@@ -134,8 +140,6 @@
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
return IB_LINK_LAYER_ETHERNET;
- case RDMA_TRANSPORT_SCIF:
- return IB_LINK_LAYER_SCIF;
default:
return IB_LINK_LAYER_UNSPECIFIED;
}
@@ -188,6 +192,122 @@
}
EXPORT_SYMBOL(ib_create_ah);
+int ib_get_grh_header_version(const void *h)
+{
+ const struct ip *ip4h = (struct ip *)(h + 20);
+ struct ip ip4h_checked;
+ const struct ip6_hdr *ip6h = (struct ip6_hdr *)h;
+
+ if (((ip6h->ip6_vfc & IPV6_VERSION_MASK) >> 4) != 6)
+ return (ip4h->ip_v == 4) ? 4 : 0;
+ /* version may be 6 or 4 */
+ if (ip4h->ip_hl != 5) /* IPv4 header length must be 5 for RR */
+ return 6;
+ /* Verify checksum.
+ We can't write on scattered buffers so we need to copy to
+ temp buffer.
+ */
+ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+ ip4h_checked.ip_sum = 0;
+ ip4h_checked.ip_sum = in_cksum_hdr(&ip4h_checked);
+ /* if IPv4 header checksum is OK, bellive it */
+ if (ip4h->ip_sum == ip4h_checked.ip_sum)
+ return 4;
+ return 6;
+}
+EXPORT_SYMBOL(ib_get_grh_header_version);
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+ u8 port_num,
+ const struct ib_grh *grh)
+{
+ int grh_version;
+
+ if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_NETWORK_IB;
+
+ grh_version = ib_get_grh_header_version(grh);
+
+ if (grh_version == 4)
+ return RDMA_NETWORK_IPV4;
+
+ if (grh->next_hdr == IPPROTO_UDP || grh->next_hdr == 0xfe)
+ return RDMA_NETWORK_IPV6;
+
+ return RDMA_NETWORK_IB;
+}
+
+struct find_gid_index_context {
+ u16 vlan_id;
+ enum ib_gid_type gid_type;
+};
+
+static bool find_gid_index(const union ib_gid *gid,
+ const struct ib_gid_attr *gid_attr,
+ void *context)
+{
+ u16 tag;
+ bool dev_supports_vlan = (VLAN_TAG(gid_attr->ndev, &tag) == 0);
+ struct find_gid_index_context *ctx =
+ (struct find_gid_index_context *)context;
+
+ if (ctx->gid_type != gid_attr->gid_type)
+ return false;
+
+ if (!!(ctx->vlan_id != 0xffff) == !dev_supports_vlan ||
+ (dev_supports_vlan && tag != ctx->vlan_id))
+ return false;
+
+ return true;
+}
+
+static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
+ u16 vlan_id, union ib_gid *sgid,
+ enum ib_gid_type gid_type,
+ u16 *gid_index)
+{
+ struct find_gid_index_context context = {.vlan_id = vlan_id,
+ .gid_type = gid_type};
+
+ return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+ &context, gid_index);
+}
+
+int ib_get_gids_from_grh(struct ib_grh *grh, enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
+{
+ union rdma_network_hdr *l3grh;
+ struct sockaddr_in src_in;
+ struct sockaddr_in dst_in;
+ __be32 src_saddr, dst_saddr;
+
+ if (!sgid || !dgid)
+ return -EINVAL;
+
+ if (net_type == RDMA_NETWORK_IPV4) {
+ l3grh = (union rdma_network_hdr *)
+ ((u8 *)grh + 20);
+ memcpy(&src_in.sin_addr.s_addr,
+ &l3grh->roce4grh.ip_src.s_addr, 4);
+ memcpy(&dst_in.sin_addr.s_addr,
+ &l3grh->roce4grh.ip_dst.s_addr, 4);
+ src_saddr = src_in.sin_addr.s_addr;
+ dst_saddr = dst_in.sin_addr.s_addr;
+ ipv6_addr_set_v4mapped(src_saddr,
+ (struct in6_addr *)sgid);
+ ipv6_addr_set_v4mapped(dst_saddr,
+ (struct in6_addr *)dgid);
+ return 0;
+ } else if (net_type == RDMA_NETWORK_IPV6 ||
+ net_type == RDMA_NETWORK_IB) {
+ *dgid = grh->dgid;
+ *sgid = grh->sgid;
+ return 0;
+ } else
+ return -EINVAL;
+}
+EXPORT_SYMBOL(ib_get_gids_from_grh);
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
struct ib_grh *grh, struct ib_ah_attr *ah_attr)
{
@@ -196,28 +316,50 @@
int ret;
int is_eth = (rdma_port_get_link_layer(device, port_num) ==
IB_LINK_LAYER_ETHERNET);
+ enum rdma_network_type net_type = RDMA_NETWORK_IB;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ union ib_gid dgid;
+ union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
if (is_eth) {
+ if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+ net_type = wc->network_hdr_type;
+ else
+ net_type = ib_get_net_type_by_grh(device, port_num, grh);
+ gid_type = ib_network_to_gid_type(net_type, grh);
+ }
+ ret = ib_get_gids_from_grh(grh, net_type, &sgid, &dgid);
+ if (ret)
+ return ret;
+
+ if (is_eth) {
+ u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
+ wc->vlan_id : 0xffff;
+
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (wc->wc_flags & IB_WC_WITH_SMAC &&
- wc->wc_flags & IB_WC_WITH_VLAN) {
- memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
- ah_attr->vlan_id = wc->vlan_id;
- } else {
+ if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
+ !(wc->wc_flags & IB_WC_WITH_VLAN)) {
u32 scope_id = rdma_get_ipv6_scope_id(device, port_num);
- ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
- ah_attr->dmac, &ah_attr->vlan_id,
+ ret = rdma_addr_find_dmac_by_grh(&dgid, &sgid,
+ ah_attr->dmac,
+ wc->wc_flags & IB_WC_WITH_VLAN ?
+ NULL : &vlan_id,
scope_id);
if (ret)
return ret;
}
- } else {
- ah_attr->vlan_id = 0xffff;
- }
+ ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+ &dgid, gid_type, &gid_index);
+ if (ret)
+ return ret;
+
+ if (wc->wc_flags & IB_WC_WITH_SMAC)
+ memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ }
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
@@ -226,12 +368,16 @@
if (wc->wc_flags & IB_WC_GRH) {
ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = grh->sgid;
+ ah_attr->grh.dgid = sgid;
- ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
- &gid_index);
- if (ret)
- return ret;
+ if (!is_eth) {
+ ret = ib_find_cached_gid_by_port(device, &dgid,
+ IB_GID_TYPE_IB,
+ port_num, NULL, 0,
+ &gid_index);
+ if (ret)
+ return ret;
+ }
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -273,6 +419,15 @@
}
EXPORT_SYMBOL(ib_query_ah);
+int ib_query_values(struct ib_device *device,
+ int q_values, struct ib_device_values *values)
+{
+ return device->query_values ?
+ device->query_values(device, q_values, values) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_values);
+
int ib_destroy_ah(struct ib_ah *ah)
{
struct ib_pd *pd;
@@ -291,7 +446,7 @@
struct ib_srq *ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr)
-{
+{
struct ib_srq *srq;
if (!pd->device->create_srq)
@@ -311,7 +466,7 @@
srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
atomic_inc(&srq->ext.xrc.xrcd->usecnt);
atomic_inc(&srq->ext.xrc.cq->usecnt);
- }
+ }
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
@@ -338,14 +493,6 @@
}
EXPORT_SYMBOL(ib_query_srq);
-int ib_query_values(struct ib_device *device,
- int q_values, struct ib_device_values *values)
-{
- return device->query_values ?
- device->query_values(device, q_values, values) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_values);
-
int ib_destroy_srq(struct ib_srq *srq)
{
struct ib_pd *pd;
@@ -360,7 +507,7 @@
pd = srq->pd;
srq_type = srq->srq_type;
if (srq_type == IB_SRQT_XRC) {
- xrcd = srq->ext.xrc.xrcd;
+ xrcd = srq->ext.xrc.xrcd;
cq = srq->ext.xrc.cq;
}
@@ -384,9 +531,6 @@
struct ib_qp *qp = context;
unsigned long flags;
- /* The code below must be synced with deletions of existing qps (ib_close_qp) --
- * because a qp from the list may be closed during the scan, resulting in a kernel Oops.
- */
spin_lock_irqsave(&qp->device->event_handler_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
@@ -461,7 +605,7 @@
if (!IS_ERR(qp)) {
qp->device = device;
qp->real_qp = qp;
- qp->uobject = NULL;
+ qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
atomic_set(&qp->usecnt, 0);
@@ -483,17 +627,17 @@
else
real_qp->device->destroy_qp(real_qp);
} else {
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
+ qp->event_handler = qp_init_attr->event_handler;
+ qp->qp_context = qp_init_attr->qp_context;
if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
qp->recv_cq = NULL;
qp->srq = NULL;
} else {
qp->recv_cq = qp_init_attr->recv_cq;
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
qp->srq = qp_init_attr->srq;
if (qp->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
+ atomic_inc(&qp_init_attr->srq->usecnt);
}
qp->pd = pd;
@@ -512,9 +656,7 @@
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_MAX];
- enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
- enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -531,13 +673,13 @@
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
- [IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
+ [IB_EXP_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS |
- IB_QP_DC_KEY),
+ IB_QP_DC_KEY ),
[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
@@ -547,8 +689,13 @@
IB_QP_QKEY),
},
.opt_param = {
- [IB_QPT_UD] = IB_QP_GROUP_RSS,
- [IB_QPT_RAW_PACKET] = IB_QP_GROUP_RSS
+ [IB_QPT_UD] = (IB_QP_GROUP_RSS |
+ IB_QP_FLOW_ENTROPY),
+ [IB_QPT_RAW_PACKET] = IB_QP_GROUP_RSS,
+ [IB_QPT_RC] = IB_QP_FLOW_ENTROPY,
+ [IB_QPT_UC] = IB_QP_FLOW_ENTROPY,
+ [IB_QPT_XRC_INI] = IB_QP_FLOW_ENTROPY,
+ [IB_QPT_XRC_TGT] = IB_QP_FLOW_ENTROPY,
}
},
},
@@ -567,7 +714,7 @@
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
- [IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
+ [IB_EXP_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
@@ -595,7 +742,7 @@
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
- [IB_QPT_DC_INI] = (IB_QP_PATH_MTU |
+ [IB_EXP_QPT_DC_INI] = (IB_QP_PATH_MTU |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
[IB_QPT_XRC_INI] = (IB_QP_AV |
@@ -609,12 +756,6 @@
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
- .req_param_add_eth = {
- [IB_QPT_RC] = (IB_QP_SMAC),
- [IB_QPT_UC] = (IB_QP_SMAC),
- [IB_QPT_XRC_INI] = (IB_QP_SMAC),
- [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
- },
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@@ -624,36 +765,18 @@
[IB_QPT_RC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
- [IB_QPT_DC_INI] = (IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX),
[IB_QPT_XRC_INI] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX),
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
- [IB_QPT_RAW_PACKET] = IB_QP_AV,
},
- .opt_param_add_eth = {
- [IB_QPT_RC] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_UC] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID)
- }
- }
+ },
},
[IB_QPS_RTR] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -668,7 +791,7 @@
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
- [IB_QPT_DC_INI] = (IB_QP_TIMEOUT |
+ [IB_EXP_QPT_DC_INI] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC),
@@ -694,7 +817,7 @@
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
- [IB_QPT_DC_INI] = (IB_QP_CUR_STATE |
+ [IB_EXP_QPT_DC_INI] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
@@ -704,10 +827,10 @@
IB_QP_ACCESS_FLAGS |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
- IB_QP_ALT_PATH |
- IB_QP_ACCESS_FLAGS |
- IB_QP_MIN_RNR_TIMER |
- IB_QP_PATH_MIG_STATE),
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -732,7 +855,7 @@
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
IB_QP_MIN_RNR_TIMER),
- [IB_QPT_DC_INI] = (IB_QP_CUR_STATE |
+ [IB_EXP_QPT_DC_INI] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
@@ -889,13 +1012,6 @@
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
- if (ll == IB_LINK_LAYER_ETHERNET) {
- req_param |= qp_state_table[cur_state][next_state].
- req_param_add_eth[type];
- opt_param |= qp_state_table[cur_state][next_state].
- opt_param_add_eth[type];
- }
-
if ((mask & req_param) != req_param)
return 0;
@@ -906,17 +1022,74 @@
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+ int ret = 0;
+ u8 start_port = qp->device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1;
+
+ if ((*qp_attr_mask & IB_QP_AV) &&
+ (qp_attr->ah_attr.port_num >= start_port) &&
+ (qp_attr->ah_attr.port_num < start_port + qp->device->phys_port_cnt) &&
+ (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) ==
+ IB_LINK_LAYER_ETHERNET)) {
+ if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
+ qp_attr->ah_attr.dmac);
+ } else {
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ u32 scope_id;
+
+ rcu_read_lock();
+ ret = ib_query_gid(qp->device,
+ qp_attr->ah_attr.port_num,
+ qp_attr->ah_attr.grh.sgid_index,
+ &sgid, &sgid_attr);
+
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ rcu_read_unlock();
+ goto out;
+ }
+ if (sgid_attr.gid_type == IB_GID_TYPE_ROCE_V2 ||
+ sgid_attr.gid_type == IB_GID_TYPE_ROCE_V1_5)
+ qp_attr->ah_attr.grh.hop_limit =
+ IPV6_DEFAULT_HOPLIMIT;
+
+ dev_hold(sgid_attr.ndev);
+ scope_id = rdma_get_ipv6_scope_id(qp->device,
+ qp_attr->ah_attr.port_num);
+
+ rcu_read_unlock();
+
+ ret = rdma_addr_find_dmac_by_grh(&sgid,
+ &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac,
+ NULL, scope_id);
+
+ dev_put(sgid_attr.ndev);
+ }
+ }
+out:
+ return ret;
+}
+EXPORT_SYMBOL(ib_resolve_eth_dmac);
+
+
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
int ret;
- ret = qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
- if (!ret && (qp_attr_mask & IB_QP_PORT))
- qp->port_num = qp_attr->port_num;
+ ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
- return ret;
+ if (ret)
+ return ret;
+
+ return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -984,6 +1157,7 @@
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
+ struct ib_rwq_ind_table *ind_tbl;
int ret;
if (atomic_read(&qp->usecnt))
@@ -992,21 +1166,25 @@
if (qp->real_qp != qp)
return __ib_destroy_shared_qp(qp);
- pd = qp->pd;
- scq = qp->send_cq;
- rcq = qp->recv_cq;
- srq = qp->srq;
+ pd = qp->pd;
+ scq = qp->send_cq;
+ rcq = qp->recv_cq;
+ srq = qp->srq;
+ ind_tbl = qp->rwq_ind_tbl;
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
- atomic_dec(&pd->usecnt);
+ atomic_dec(&pd->usecnt);
if (scq)
- atomic_dec(&scq->usecnt);
+ atomic_dec(&scq->usecnt);
if (rcq)
- atomic_dec(&rcq->usecnt);
+ atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
+ if (ind_tbl)
+ atomic_dec(&ind_tbl->usecnt);
+
}
return ret;
@@ -1360,19 +1538,9 @@
if (!qp->device->attach_mcast)
return -ENOSYS;
-
- switch (rdma_node_get_transport(qp->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
- qp->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
- if (qp->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
- break;
- }
+ if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
+ qp->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
if (!ret)
@@ -1387,20 +1555,9 @@
if (!qp->device->detach_mcast)
return -ENOSYS;
-
- switch (rdma_node_get_transport(qp->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
- qp->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
-
- if (qp->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
- break;
- }
+ if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
+ qp->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
if (!ret)
@@ -1448,6 +1605,125 @@
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
+struct ib_wq *ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *wq_attr)
+{
+ struct ib_wq *wq;
+
+ if (!pd->device->create_wq)
+ return ERR_PTR(-ENOSYS);
+
+ wq = pd->device->create_wq(pd, wq_attr, NULL);
+ if (!IS_ERR(wq)) {
+ wq->event_handler = wq_attr->event_handler;
+ wq->wq_context = wq_attr->wq_context;
+ wq->wq_type = wq_attr->wq_type;
+ wq->cq = wq_attr->cq;
+ wq->device = pd->device;
+ wq->pd = pd;
+ wq->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&wq_attr->cq->usecnt);
+ if (wq_attr->srq) {
+ wq->srq = wq_attr->srq;
+ atomic_inc(&wq_attr->srq->usecnt);
+ }
+ atomic_set(&wq->usecnt, 0);
+ }
+ return wq;
+}
+EXPORT_SYMBOL(ib_create_wq);
+
+int ib_destroy_wq(struct ib_wq *wq)
+{
+ int err;
+ struct ib_cq *cq = wq->cq;
+ struct ib_pd *pd = wq->pd;
+ struct ib_srq *srq = wq->srq;
+
+ if (!wq->device->destroy_wq)
+ return -ENOSYS;
+
+ if (atomic_read(&wq->usecnt))
+ return -EBUSY;
+
+ err = wq->device->destroy_wq(wq);
+ if (!err) {
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&cq->usecnt);
+ if (srq)
+ atomic_dec(&srq->usecnt);
+ }
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_wq);
+
+int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ enum ib_wq_attr_mask attr_mask)
+{
+ int err;
+
+ if (!wq->device->modify_wq)
+ return -ENOSYS;
+
+ err = wq->device->modify_wq(wq, wq_attr, attr_mask, NULL);
+ return err;
+}
+EXPORT_SYMBOL(ib_modify_wq);
+
+struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr*
+ init_attr)
+{
+ struct ib_rwq_ind_table *rwq_ind_table;
+ int i;
+ u32 table_size;
+
+ if (!device->create_rwq_ind_table)
+ return ERR_PTR(-ENOSYS);
+
+ table_size = (1 << init_attr->log_ind_tbl_size);
+ rwq_ind_table = device->create_rwq_ind_table(device,
+ init_attr, NULL);
+ if (IS_ERR(rwq_ind_table))
+ return rwq_ind_table;
+
+ rwq_ind_table->ind_tbl = init_attr->ind_tbl;
+ rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
+ rwq_ind_table->device = device;
+ rwq_ind_table->pd = init_attr->pd;
+ rwq_ind_table->uobject = NULL;
+ atomic_set(&rwq_ind_table->usecnt, 0);
+
+ for (i = 0; i < table_size; i++)
+ atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
+
+ return rwq_ind_table;
+}
+EXPORT_SYMBOL(ib_create_rwq_ind_table);
+
+int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
+{
+ int err, i;
+ u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
+ struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
+
+ if (atomic_read(&rwq_ind_table->usecnt))
+ return -EBUSY;
+
+ if (!rwq_ind_table->device->destroy_rwq_ind_table)
+ return -ENOSYS;
+
+ err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
+ if (!err) {
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
+
struct ib_flow *ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
@@ -1457,8 +1733,11 @@
return ERR_PTR(-ENOSYS);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
- if (!IS_ERR(flow_id))
+ if (!IS_ERR(flow_id)) {
atomic_inc(&qp->usecnt);
+ flow_id->qp = qp;
+ flow_id->uobject = NULL;
+ }
return flow_id;
}
EXPORT_SYMBOL(ib_create_flow);
@@ -1466,13 +1745,8 @@
int ib_destroy_flow(struct ib_flow *flow_id)
{
int err;
- struct ib_qp *qp;
+ struct ib_qp *qp = flow_id->qp;
- if (!flow_id)
- return -EINVAL;
- qp = flow_id->qp;
- if (!qp->device->destroy_flow)
- return -ENOSYS;
err = qp->device->destroy_flow(flow_id);
if (!err)
atomic_dec(&qp->usecnt);
@@ -1504,16 +1778,22 @@
int ib_destroy_dct(struct ib_dct *dct)
{
+ struct ib_srq *srq;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
int err;
if (!dct->device->exp_destroy_dct)
return -ENOSYS;
+ srq = dct->srq;
+ cq = dct->cq;
+ pd = dct->pd;
err = dct->device->exp_destroy_dct(dct);
if (!err) {
- atomic_dec(&dct->srq->usecnt);
- atomic_dec(&dct->cq->usecnt);
- atomic_dec(&dct->pd->usecnt);
+ atomic_dec(&srq->usecnt);
+ atomic_dec(&cq->usecnt);
+ atomic_dec(&pd->usecnt);
}
return err;
@@ -1529,6 +1809,15 @@
}
EXPORT_SYMBOL(ib_query_dct);
+int ib_arm_dct(struct ib_dct *dct)
+{
+ if (!dct->device->exp_arm_dct)
+ return -ENOSYS;
+
+ return dct->device->exp_arm_dct(dct, NULL);
+}
+EXPORT_SYMBOL(ib_arm_dct);
+
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
@@ -1536,3 +1825,50 @@
mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);
+
+int ib_query_mkey(struct ib_mr *mr, u64 mkey_attr_mask,
+ struct ib_mkey_attr *mkey_attr)
+{
+ return mr->device->exp_query_mkey ?
+ mr->device->exp_query_mkey(mr, mkey_attr_mask, mkey_attr) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_mkey);
+
+int ib_roce_mode_is_over_ip(struct ib_device *ibdev, int port_num)
+{
+ struct ib_device_attr attr;
+ if ((rdma_port_get_link_layer(ibdev, port_num) == IB_LINK_LAYER_ETHERNET) &&
+ !ib_query_device(ibdev, &attr) &&
+ (attr.device_cap_flags & (IB_DEVICE_ROCE_MODE_1_5 | IB_DEVICE_ROCE_MODE_2)))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(ib_roce_mode_is_over_ip);
+
+struct ib_indir_reg_list *
+ib_alloc_indir_reg_list(struct ib_device *device,
+ unsigned int max_indir_list_len)
+{
+ struct ib_indir_reg_list *indir_list;
+
+ if (!device->alloc_indir_reg_list)
+ return ERR_PTR(-ENOSYS);
+
+ indir_list = device->alloc_indir_reg_list(device,
+ max_indir_list_len);
+ if (!IS_ERR(indir_list)) {
+ indir_list->device = device;
+ indir_list->max_indir_list_len = max_indir_list_len;
+ }
+
+ return indir_list;
+}
+EXPORT_SYMBOL(ib_alloc_indir_reg_list);
+
+void
+ib_free_indir_reg_list(struct ib_indir_reg_list *indir_list)
+{
+ if (indir_list->device->free_indir_reg_list)
+ indir_list->device->free_indir_reg_list(indir_list);
+}
+EXPORT_SYMBOL(ib_free_indir_reg_list);
Index: sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
===================================================================
--- sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
+++ sys/ofed/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,7 +281,7 @@
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
ah->av->gid_index % dev->limits.gid_table_len,
- &header->grh.source_gid);
+ &header->grh.source_gid, NULL);
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
}
Index: sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
===================================================================
--- sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
+++ sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1500,7 +1500,7 @@
ib_ud_header_init(256, /* assume a MAD */
1, 0, 0,
mthca_ah_grh_present(to_mah(wr->wr.ud.ah)),
- 0,
+ 0, 0, 0,
&sqp->ud_header);
err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -633,7 +633,7 @@
IB_SA_PATH_REC_NUMB_PATH |
IB_SA_PATH_REC_TRAFFIC_CLASS |
IB_SA_PATH_REC_PKEY,
- 1000, GFP_ATOMIC,
+ 1000, 0, GFP_ATOMIC,
path_rec_completion,
path, &path->query);
if (path->query_id < 0) {
@@ -991,7 +991,7 @@
priv->broadcastaddr[8] = priv->pkey >> 8;
priv->broadcastaddr[9] = priv->pkey & 0xff;
- result = ib_query_gid(hca, port, 0, &priv->local_gid);
+ result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
if (result) {
printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
hca->name, port, result);
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -480,7 +480,7 @@
return;
}
- if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
+ if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
Index: sys/ofed/include/rdma/ib_verbs.h
===================================================================
--- sys/ofed/include/rdma/ib_verbs.h
+++ sys/ofed/include/rdma/ib_verbs.h
@@ -211,8 +211,8 @@
IB_DEVICE_SIGNATURE_HANDOVER = (1ULL<<32),
IB_DEVICE_ROCE_MODE_1_5 = (1ULL<<34),
IB_DEVICE_ROCE_MODE_2 = (1ULL<<35),
- IB_DEVICE_INDIR_REGISTRATION = (1ULL<<36)
-
+ IB_DEVICE_INDIR_REGISTRATION = (1ULL<<36),
+ IB_DEVICE_SIGNATURE_RESP_PIPE = (1ULL<<37),
};
enum ib_signature_prot_cap {
@@ -229,7 +229,8 @@
enum ib_atomic_cap {
IB_ATOMIC_NONE,
IB_ATOMIC_HCA,
- IB_ATOMIC_GLOB
+ IB_ATOMIC_GLOB,
+ IB_ATOMIC_HCA_REPLY_BE = 0x40 /* HOST is LE and atomic reply is BE */
};
enum ib_cq_create_flags {
@@ -319,7 +320,8 @@
IB_PORT_INIT = 2,
IB_PORT_ARMED = 3,
IB_PORT_ACTIVE = 4,
- IB_PORT_ACTIVE_DEFER = 5
+ IB_PORT_ACTIVE_DEFER = 5,
+ IB_PORT_DUMMY = -1, /* force enum signed */
};
enum ib_port_cap_flags {
@@ -496,6 +498,13 @@
IB_EVENT_QP_LAST_WQE_REACHED,
IB_EVENT_CLIENT_REREGISTER,
IB_EVENT_GID_CHANGE,
+
+ /* New experimental events start here leaving enough
+ * room for 14 events which should be enough.
+ */
+ IB_EXP_EVENT_DCT_KEY_VIOLATION = 32,
+ IB_EXP_EVENT_DCT_ACCESS_ERR,
+ IB_EXP_EVENT_DCT_REQ_ERR,
};
struct ib_event {
@@ -504,6 +513,7 @@
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_srq *srq;
+ struct ib_dct *dct;
u8 port_num;
} element;
enum ib_event_type event;
@@ -722,6 +732,16 @@
};
/**
+ * struct ib_mkey_attr - Memory key attributes
+ *
+ * @max_reg_descriptors: how many mrs we can we register with this mkey
+ */
+struct ib_mkey_attr {
+ u32 max_reg_descriptors;
+};
+
+
+/**
* mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
* enum.
* @mult: multiple to convert.
@@ -761,7 +781,8 @@
IB_WC_INV_EEC_STATE_ERR,
IB_WC_FATAL_ERR,
IB_WC_RESP_TIMEOUT_ERR,
- IB_WC_GENERAL_ERR
+ IB_WC_GENERAL_ERR,
+ IB_WC_SIG_PIPELINE_CANCELED,
};
enum ib_wc_opcode {
@@ -783,7 +804,7 @@
*/
IB_WC_RECV = 1 << 7,
IB_WC_RECV_RDMA_WITH_IMM,
- IB_WC_DUMMY = -1 /* force enum signed */
+ IB_WC_DUMMY = -1, /* force enum signed */
};
enum ib_wc_flags {
@@ -892,7 +913,7 @@
IB_QPT_RAW_PACKET = 8,
IB_QPT_XRC_INI = 9,
IB_QPT_XRC_TGT,
- IB_QPT_DC_INI,
+ IB_EXP_QPT_DC_INI = 32,
IB_QPT_MAX,
/* Reserve a range for qp types internal to the low level driver.
* These qp types will not be visible at the IB core layer, so the
@@ -921,6 +942,7 @@
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
IB_QP_CREATE_ATOMIC_BE_REPLY = 1 << 8,
IB_QP_CREATE_SIGNATURE_PIPELINE = 1 << 9,
+ IB_QP_CREATE_RX_END_PADDING = 1 << 11,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -977,6 +999,9 @@
u8 gid_index;
u8 hop_limit;
u32 create_flags;
+ u32 inline_size;
+ void (*event_handler)(struct ib_event *, void *);
+ void *dct_context;
};
struct ib_dct_attr {
@@ -1070,7 +1095,8 @@
IB_QPS_RTS,
IB_QPS_SQD,
IB_QPS_SQE,
- IB_QPS_ERR
+ IB_QPS_ERR,
+ IB_QPS_DUMMY = -1, /* force enum signed */
};
enum ib_mig_state {
@@ -1145,7 +1171,7 @@
IB_WR_RESERVED8,
IB_WR_RESERVED9,
IB_WR_RESERVED10,
- IB_WR_DUMMY = -1 /* force enum signed */
+ IB_WR_DUMMY = -1, /* force enum signed */
};
enum ib_send_flags {
@@ -1154,6 +1180,7 @@
IB_SEND_SOLICITED = (1<<2),
IB_SEND_INLINE = (1<<3),
IB_SEND_IP_CSUM = (1<<4),
+ IB_SEND_SIG_PIPELINED = (1<<5),
/* reserve bits 26-31 for low level drivers' internal use */
IB_SEND_RESERVED_START = (1 << 26),
@@ -1285,7 +1312,8 @@
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
IB_ACCESS_MW_BIND = (1<<4),
IB_ACCESS_ALLOCATE_MR = (1<<5),
- IB_ZERO_BASED = (1<<13)
+ IB_ZERO_BASED = (1<<13),
+ IB_ACCESS_ON_DEMAND = (1<<14),
};
struct ib_phys_buf {
@@ -1474,6 +1502,27 @@
atomic_t usecnt;
};
+enum ib_mp_rq_shifts {
+ IB_MP_RQ_NO_SHIFT = 0,
+ IB_MP_RQ_2BYTES_SHIFT = 1 << 0
+};
+
+struct ib_wq_mp_rq {
+ uint8_t use_mp_rq;
+ enum ib_mp_rq_shifts use_shift;
+ uint8_t single_wqe_log_num_of_strides;
+ uint8_t single_stride_log_num_of_bytes;
+};
+
+enum ib_wq_vlan_offloads {
+ IB_WQ_CVLAN_STRIPPING = (1 << 0),
+};
+
+enum ibv_exp_wq_init_attr_flags {
+ IB_CREATE_WQ_FLAG_RX_END_PADDING = (1ULL << 0),
+ IB_CREATE_WQ_FLAG_RESERVED = (1ULL << 1)
+};
+
struct ib_wq_init_attr {
void *wq_context;
enum ib_wq_type wq_type;
@@ -1482,16 +1531,22 @@
struct ib_cq *cq;
struct ib_srq *srq; /* IB_WQT_SRQ only */
void (*event_handler)(struct ib_event *, void *);
+ struct ib_wq_mp_rq mp_rq;
+ u16 vlan_offloads;
+ u64 flags;
};
enum ib_wq_attr_mask {
IB_WQ_STATE = 1 << 0,
IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_VLAN_OFFLOADS = 1 << 2,
+ IB_WQ_MASK_ALL = (IB_WQ_VLAN_OFFLOADS << 1) - 1,
};
struct ib_wq_attr {
enum ib_wq_state wq_state;
enum ib_wq_state curr_wq_state;
+ u16 vlan_offloads;
};
struct ib_rwq_ind_table {
@@ -1539,6 +1594,8 @@
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_srq *srq;
+ void (*event_handler)(struct ib_event *, void *);
+ void *dct_context;
u32 dct_num;
};
@@ -2115,6 +2172,10 @@
struct ib_udata *udata);
int (*exp_destroy_dct)(struct ib_dct *dct);
int (*exp_query_dct)(struct ib_dct *dct, struct ib_dct_attr *attr);
+ int (*exp_arm_dct)(struct ib_dct *dct, struct ib_udata *udata);
+ int (*exp_query_mkey)(struct ib_mr *mr,
+ u64 mkey_attr_mask,
+ struct ib_mkey_attr *mkey_attr);
/**
* exp_rereg_user_mr - Modifies the attributes of an existing memory region.
* Conceptually, this call performs the functions deregister memory region
@@ -3232,6 +3293,11 @@
*/
int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+/*
+ * ib_arm_dct - Arm a DCT to generate DC key violations
+ * @dct: pointer to the DCT object
+ */
+int ib_arm_dct(struct ib_dct *dct);
static inline void ib_active_speed_enum_to_rate(u8 active_speed,
int *rate,
@@ -3301,6 +3367,16 @@
struct ib_mr_status *mr_status);
/**
+ * ib_query_mkey - Retrieves information about a specific memory
+ * key.
+ * @mr: The memory region to retrieve information about.
+ * @mkey_attr_mask: Which attributes to get
+ * @mkey_attr: The attributes of the specified memory region.
+ */
+int ib_query_mkey(struct ib_mr *mr, u64 mkey_attr_mask,
+ struct ib_mkey_attr *mkey_attr);
+
+/**
* ib_query_values - Query values from the HCA
* @device: The device on which to query the values from
* @q_values - combination of enum ib_values_mask flags to query
Index: sys/ofed/include/rdma/ib_verbs_exp.h
===================================================================
--- sys/ofed/include/rdma/ib_verbs_exp.h
+++ sys/ofed/include/rdma/ib_verbs_exp.h
@@ -134,6 +134,7 @@
IB_EXP_DEVICE_ATTR_RX_HASH = 1ULL << 13,
IB_EXP_DEVICE_ATTR_MAX_WQ_TYPE_RQ = 1ULL << 14,
IB_EXP_DEVICE_ATTR_MAX_DEVICE_CTX = 1ULL << 15,
+ IB_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN = 1ULL << 20,
};
struct ib_exp_device_attr {
@@ -160,6 +161,13 @@
struct ib_exp_rx_hash_caps rx_hash_caps;
uint32_t max_wq_type_rq;
uint32_t max_device_ctx;
+ /*
+ * The alignment of the padding end address.
+ * Which means that when RX end of packet padding is enabled the device
+ * will pad the end of RX packet up until the next address which is
+ * aligned to the rx_pad_end_addr_align size.
+ */
+ u16 rx_pad_end_addr_align;
};
struct ib_exp_qp_init_attr {

File Metadata

Mime Type
text/plain
Expires
Sun, Nov 23, 9:45 AM (21 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26010614
Default Alt Text
D5791.id21885.diff (456 KB)

Event Timeline