Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c
index ad47891f1ba0..0cc698a7e38f 100644
--- a/sys/dev/cxgbe/iw_cxgbe/provider.c
+++ b/sys/dev/cxgbe/iw_cxgbe/provider.c
@@ -1,507 +1,526 @@
/*
* Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#define LINUXKPI_PARAM_PREFIX iw_cxgbe_
#include "opt_inet.h"
#ifdef TCP_OFFLOAD
#include <asm/pgtable.h>
#include <linux/page.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include "iw_cxgbe.h"
#include "user.h"
static int fastreg_support = 1;
module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default = 1)");
static int c4iw_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
struct ib_port_modify *props)
{
return -ENOSYS;
}
static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
struct ib_ah_attr *ah_attr)
{
return ERR_PTR(-ENOSYS);
}
static int c4iw_ah_destroy(struct ib_ah *ah)
{
return -ENOSYS;
}
static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
return -ENOSYS;
}
static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
return -ENOSYS;
}
static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags,
u8 port_num, struct ib_wc *in_wc,
struct ib_grh *in_grh, struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
return -ENOSYS;
}
static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct c4iw_dev *rhp = to_c4iw_dev(context->device);
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
struct c4iw_mm_entry *mm, *tmp;
CTR2(KTR_IW_CXGBE, "%s context %p", __func__, context);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
return 0;
}
static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct c4iw_ucontext *context;
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
CTR2(KTR_IW_CXGBE, "%s ibdev %p", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
return &context->ibucontext;
}
#ifdef DOT5
static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
{
return pgprot_writecombine(prot);
}
#endif
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
int len = vma->vm_end - vma->vm_start;
u32 key = vma->vm_pgoff << PAGE_SHIFT;
struct c4iw_rdev *rdev;
int ret = 0;
struct c4iw_mm_entry *mm;
struct c4iw_ucontext *ucontext;
u64 addr, paddr;
u64 va_regs_res = 0, va_udbs_res = 0;
u64 len_regs_res = 0, len_udbs_res = 0;
CTR3(KTR_IW_CXGBE, "%s:1 ctx %p vma %p", __func__, context, vma);
CTR4(KTR_IW_CXGBE, "%s:1a pgoff 0x%lx key 0x%x len %d", __func__,
vma->vm_pgoff, key, len);
if (vma->vm_start & (PAGE_SIZE-1)) {
CTR3(KTR_IW_CXGBE, "%s:2 unaligned vm_start %u vma %p",
__func__, vma->vm_start, vma);
return -EINVAL;
}
rdev = &(to_c4iw_dev(context->device)->rdev);
ucontext = to_c4iw_ucontext(context);
mm = remove_mmap(ucontext, key, len);
if (!mm) {
CTR4(KTR_IW_CXGBE, "%s:3 ucontext %p key %u len %u", __func__,
ucontext, key, len);
return -EINVAL;
}
addr = mm->addr;
kfree(mm);
va_regs_res = (u64)rman_get_virtual(rdev->adap->regs_res);
len_regs_res = (u64)rman_get_size(rdev->adap->regs_res);
va_udbs_res = (u64)rman_get_virtual(rdev->adap->udbs_res);
len_udbs_res = (u64)rman_get_size(rdev->adap->udbs_res);
CTR6(KTR_IW_CXGBE,
"%s:4 addr %p, masync region %p:%p, udb region %p:%p", __func__,
addr, va_regs_res, va_regs_res+len_regs_res, va_udbs_res,
va_udbs_res+len_udbs_res);
if (addr >= va_regs_res && addr < va_regs_res + len_regs_res) {
CTR4(KTR_IW_CXGBE, "%s:5 MA_SYNC addr %p region %p, reglen %u",
__func__, addr, va_regs_res, len_regs_res);
/*
* MA_SYNC register...
*/
paddr = vtophys(addr);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ret = io_remap_pfn_range(vma, vma->vm_start,
paddr >> PAGE_SHIFT,
len, vma->vm_page_prot);
} else {
if (addr >= va_udbs_res && addr < va_udbs_res + len_udbs_res) {
/*
* Map user DB or OCQP memory...
*/
paddr = vtophys(addr);
CTR4(KTR_IW_CXGBE,
"%s:6 USER DB-GTS addr %p region %p, reglen %u",
__func__, addr, va_udbs_res, len_udbs_res);
#ifdef DOT5
if (!is_t4(rdev->lldi.adapter_type) && map_udb_as_wc)
vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
else
#endif
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ret = io_remap_pfn_range(vma, vma->vm_start,
paddr >> PAGE_SHIFT,
len, vma->vm_page_prot);
} else {
/*
* Map WQ or CQ contig dma memory...
*/
CTR4(KTR_IW_CXGBE,
"%s:7 WQ/CQ addr %p vm_start %u vma %p", __func__,
addr, vma->vm_start, vma);
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
}
}
CTR4(KTR_IW_CXGBE, "%s:8 ctx %p vma %p ret %u", __func__, context, vma,
ret);
return ret;
}
static int
c4iw_deallocate_pd(struct ib_pd *pd)
{
struct c4iw_pd *php = to_c4iw_pd(pd);
struct c4iw_dev *rhp = php->rhp;
CTR3(KTR_IW_CXGBE, "%s: pd %p, pdid 0x%x", __func__, pd, php->pdid);
c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
kfree(php);
return (0);
}
static struct ib_pd *
c4iw_allocate_pd(struct ib_device *ibdev, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct c4iw_pd *php;
u32 pdid;
struct c4iw_dev *rhp;
CTR4(KTR_IW_CXGBE, "%s: ibdev %p, context %p, data %p", __func__, ibdev,
context, udata);
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
return ERR_PTR(-EINVAL);
php = kzalloc(sizeof(*php), GFP_KERNEL);
if (!php) {
c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
return ERR_PTR(-ENOMEM);
}
php->pdid = pdid;
php->rhp = rhp;
if (context) {
if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) {
c4iw_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
}
}
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur++;
if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
CTR6(KTR_IW_CXGBE,
"%s: ibdev %p, context %p, data %p, pddid 0x%x, pd %p", __func__,
ibdev, context, udata, pdid, php);
return (&php->ibpd);
}
static int
c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
CTR5(KTR_IW_CXGBE, "%s ibdev %p, port %d, index %d, pkey %p", __func__,
ibdev, port, index, pkey);
*pkey = 0;
return (0);
}
static int
c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid)
{
struct c4iw_dev *dev;
struct port_info *pi;
struct adapter *sc;
CTR5(KTR_IW_CXGBE, "%s ibdev %p, port %d, index %d, gid %p", __func__,
ibdev, port, index, gid);
memset(&gid->raw[0], 0, sizeof(gid->raw));
dev = to_c4iw_dev(ibdev);
sc = dev->rdev.adap;
if (port == 0 || port > sc->params.nports)
return (-EINVAL);
pi = sc->port[port - 1];
memcpy(&gid->raw[0], pi->vi[0].hw_addr, ETHER_ADDR_LEN);
return (0);
}
static int
c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
{
struct c4iw_dev *dev = to_c4iw_dev(ibdev);
struct adapter *sc = dev->rdev.adap;
const int spg_ndesc = sc->params.sge.spg_len / EQ_ESIZE;
CTR3(KTR_IW_CXGBE, "%s ibdev %p, props %p", __func__, ibdev, props);
memset(props, 0, sizeof *props);
memcpy(&props->sys_image_guid, sc->port[0]->vi[0].hw_addr,
ETHER_ADDR_LEN);
props->hw_ver = sc->params.chipid;
props->fw_ver = sc->params.fw_vers;
props->device_cap_flags = dev->device_cap_flags;
props->page_size_cap = T4_PAGESIZE_MASK;
props->vendor_id = pci_get_vendor(sc->dev);
props->vendor_part_id = pci_get_device(sc->dev);
props->max_mr_size = T4_MAX_MR_SIZE;
props->max_qp = sc->vres.qp.size / 2;
props->max_qp_wr = T4_MAX_QP_DEPTH(spg_ndesc);
props->max_sge = T4_MAX_RECV_SGE;
props->max_sge_rd = 1;
props->max_res_rd_atom = sc->params.max_ird_adapter;
props->max_qp_rd_atom = min(sc->params.max_ordird_qp,
c4iw_max_read_depth);
props->max_qp_init_rd_atom = props->max_qp_rd_atom;
props->max_cq = sc->vres.qp.size;
props->max_cqe = T4_MAX_CQ_DEPTH;
props->max_mr = c4iw_num_stags(&dev->rdev);
props->max_pd = T4_MAX_NUM_PD;
props->local_ca_ack_delay = 0;
props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH;
return (0);
}
/*
* Returns -errno on failure.
*/
static int
c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
{
struct c4iw_dev *dev;
struct adapter *sc;
struct port_info *pi;
struct ifnet *ifp;
CTR4(KTR_IW_CXGBE, "%s ibdev %p, port %d, props %p", __func__, ibdev,
port, props);
dev = to_c4iw_dev(ibdev);
sc = dev->rdev.adap;
if (port > sc->params.nports)
return (-EINVAL);
pi = sc->port[port - 1];
ifp = pi->vi[0].ifp;
memset(props, 0, sizeof(struct ib_port_attr));
props->max_mtu = IB_MTU_4096;
if (ifp->if_mtu >= 4096)
props->active_mtu = IB_MTU_4096;
else if (ifp->if_mtu >= 2048)
props->active_mtu = IB_MTU_2048;
else if (ifp->if_mtu >= 1024)
props->active_mtu = IB_MTU_1024;
else if (ifp->if_mtu >= 512)
props->active_mtu = IB_MTU_512;
else
props->active_mtu = IB_MTU_256;
props->state = pi->link_cfg.link_ok ? IB_PORT_ACTIVE : IB_PORT_DOWN;
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_SNMP_TUNNEL_SUP |
IB_PORT_REINIT_SUP |
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->active_width = 2;
props->active_speed = 2;
props->max_msg_sz = -1;
return 0;
}
+static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
/*
* Returns -errno on error.
*/
int
c4iw_register_device(struct c4iw_dev *dev)
{
struct adapter *sc = dev->rdev.adap;
struct ib_device *ibdev = &dev->ibdev;
struct iw_cm_verbs *iwcm;
int ret;
CTR3(KTR_IW_CXGBE, "%s c4iw_dev %p, adapter %p", __func__, dev, sc);
BUG_ON(!sc->port[0]);
strlcpy(ibdev->name, device_get_nameunit(sc->dev), sizeof(ibdev->name));
memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid));
memcpy(&ibdev->node_guid, sc->port[0]->vi[0].hw_addr, ETHER_ADDR_LEN);
ibdev->owner = THIS_MODULE;
dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
if (fastreg_support)
dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
ibdev->local_dma_lkey = 0;
ibdev->uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_POLL_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_POST_SEND) |
(1ull << IB_USER_VERBS_CMD_POST_RECV);
ibdev->node_type = RDMA_NODE_RNIC;
strlcpy(ibdev->node_desc, C4IW_NODE_DESC, sizeof(ibdev->node_desc));
ibdev->phys_port_cnt = sc->params.nports;
ibdev->num_comp_vectors = 1;
ibdev->dma_device = NULL;
ibdev->query_device = c4iw_query_device;
ibdev->query_port = c4iw_query_port;
ibdev->modify_port = c4iw_modify_port;
ibdev->query_pkey = c4iw_query_pkey;
ibdev->query_gid = c4iw_query_gid;
ibdev->alloc_ucontext = c4iw_alloc_ucontext;
ibdev->dealloc_ucontext = c4iw_dealloc_ucontext;
ibdev->mmap = c4iw_mmap;
ibdev->alloc_pd = c4iw_allocate_pd;
ibdev->dealloc_pd = c4iw_deallocate_pd;
ibdev->create_ah = c4iw_ah_create;
ibdev->destroy_ah = c4iw_ah_destroy;
ibdev->create_qp = c4iw_create_qp;
ibdev->modify_qp = c4iw_ib_modify_qp;
ibdev->query_qp = c4iw_ib_query_qp;
ibdev->destroy_qp = c4iw_destroy_qp;
ibdev->create_cq = c4iw_create_cq;
ibdev->destroy_cq = c4iw_destroy_cq;
ibdev->resize_cq = c4iw_resize_cq;
ibdev->poll_cq = c4iw_poll_cq;
ibdev->get_dma_mr = c4iw_get_dma_mr;
ibdev->reg_phys_mr = c4iw_register_phys_mem;
ibdev->rereg_phys_mr = c4iw_reregister_phys_mem;
ibdev->reg_user_mr = c4iw_reg_user_mr;
ibdev->dereg_mr = c4iw_dereg_mr;
ibdev->alloc_mw = c4iw_alloc_mw;
ibdev->bind_mw = c4iw_bind_mw;
ibdev->dealloc_mw = c4iw_dealloc_mw;
ibdev->alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr;
ibdev->alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
ibdev->free_fast_reg_page_list = c4iw_free_fastreg_pbl;
ibdev->attach_mcast = c4iw_multicast_attach;
ibdev->detach_mcast = c4iw_multicast_detach;
ibdev->process_mad = c4iw_process_mad;
ibdev->req_notify_cq = c4iw_arm_cq;
ibdev->post_send = c4iw_post_send;
ibdev->post_recv = c4iw_post_receive;
ibdev->uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
+ ibdev->get_port_immutable = c4iw_port_immutable;
iwcm = kmalloc(sizeof(*iwcm), GFP_KERNEL);
if (iwcm == NULL)
return (-ENOMEM);
iwcm->connect = c4iw_connect;
iwcm->accept = c4iw_accept_cr;
iwcm->reject = c4iw_reject_cr;
iwcm->create_listen_ep = c4iw_create_listen_ep;
iwcm->destroy_listen_ep = c4iw_destroy_listen_ep;
iwcm->newconn = process_newconn;
iwcm->add_ref = c4iw_qp_add_ref;
iwcm->rem_ref = c4iw_qp_rem_ref;
iwcm->get_qp = c4iw_get_qp;
ibdev->iwcm = iwcm;
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
kfree(iwcm);
return (ret);
}
void
c4iw_unregister_device(struct c4iw_dev *dev)
{
CTR3(KTR_IW_CXGBE, "%s c4iw_dev %p, adapter %p", __func__, dev,
dev->rdev.adap);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;
}
#endif
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
index 4ceaa6d77b17..298e7e698937 100644
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
@@ -1,2895 +1,2928 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define LINUXKPI_PARAM_PREFIX mlx4_
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/if_vlan.h>
#include <linux/fs.h>
#include <net/ipv6.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_user_verbs_exp.h>
#include <rdma/ib_addr.h>
#include <dev/mlx4/driver.h>
#include <dev/mlx4/cmd.h>
#include <linux/sched.h>
#include <linux/page.h>
#include <linux/printk.h>
#include "mlx4_ib.h"
#include "mlx4_exp.h"
#include "user.h"
#include "wc.h"
#define DRV_NAME MLX4_IB_DRV_NAME
#define DRV_VERSION "1.0"
#define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib"
#define MLX4_IB_MRS_PROC_DIR_NAME "mrs"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
#ifdef __linux__
MODULE_VERSION(DRV_VERSION);
#endif
int mlx4_ib_sm_guid_assign = 1;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
enum {
MAX_NUM_STR_BITMAP = 1 << 15,
DEFAULT_TBL_VAL = -1
};
static struct mlx4_dbdf2val_lst dev_assign_str = {
.name = "dev_assign_str param",
.num_vals = 1,
.def_val = {DEFAULT_TBL_VAL},
.range = {0, MAX_NUM_STR_BITMAP - 1}
};
module_param_string(dev_assign_str, dev_assign_str.str,
sizeof(dev_assign_str.str), 0444);
MODULE_PARM_DESC(dev_assign_str,
"Map device function numbers to IB device numbers (e.g. '0000:04:00.0-0,002b:1c:0b.a-1,...').\n"
"\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for IB device numbers (e.g. 1).\n"
"\t\tMax supported devices - 32");
static unsigned long *dev_num_str_bitmap;
static spinlock_t dev_num_str_lock;
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION "\n";
struct update_gid_work {
struct work_struct work;
union ib_gid gids[128];
struct mlx4_ib_dev *dev;
int port;
};
struct dev_rec {
int bus;
int dev;
int func;
int nr;
};
static int dr_active;
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device*,
unsigned long);
static u8 mlx4_ib_get_dev_port(struct net_device *dev,
struct mlx4_ib_dev *ibdev);
static struct workqueue_struct *wq;
static void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
mad->class_version = 1;
mad->method = IB_MGMT_METHOD_GET;
}
static union ib_gid zgid;
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
int ib_num_ports = 0;
int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
if (dmfs) {
int i;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
eth_num_ports++;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
dmfs &= (!ib_num_ports ||
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
(!eth_num_ports ||
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
if (ib_num_ports && mlx4_is_mfunc(dev)) {
dmfs = 0;
}
}
return dmfs;
}
int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memset(props, 0, sizeof *props);
props->fw_ver = dev->dev->caps.fw_ver;
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK |
IB_DEVICE_SHARED_MR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_CROSS_CHANNEL)
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
if (check_flow_steering_support(dev->dev))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
props->device_cap_flags |= IB_DEVICE_QPG;
if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
props->device_cap_flags |= IB_DEVICE_UD_RSS;
props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz;
}
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
}
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
props->hca_core_clock = dev->dev->caps.hca_core_clock;
if (dev->dev->caps.hca_core_clock > 0)
props->comp_mask |= IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
if (dev->dev->caps.cq_timestamp) {
props->timestamp_mask = 0xFFFFFFFFFFFF;
props->comp_mask |= IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK;
}
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static enum rdma_link_layer
mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
{
struct mlx4_dev *dev = to_mdev(device)->dev;
return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}
static int ib_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int ext_active_speed;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
props->sm_sl = out_mad->data[36] & 0xf;
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
if (netw_view)
props->gid_tbl_len = out_mad->data[50];
else
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
props->max_mtu = out_mad->data[41] & 0xf;
props->active_mtu = out_mad->data[36] >> 4;
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
/* Check if extended speeds (EDR/FDR/...) are supported */
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
ext_active_speed = out_mad->data[62] >> 4;
switch (ext_active_speed) {
case 1:
props->active_speed = IB_SPEED_FDR;
break;
case 2:
props->active_speed = IB_SPEED_EDR;
break;
}
}
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == IB_SPEED_QDR) {
init_query_mad(in_mad);
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
/* Checking LinkSpeedActive for FDR-10 */
if (out_mad->data[15] & 0x1)
props->active_speed = IB_SPEED_FDR10;
}
/* Avoid wrong speed value returned by FW if the IB link is down. */
if (props->state == IB_PORT_DOWN)
props->active_speed = IB_SPEED_SDR;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static u8 state_to_phys_state(enum ib_port_state state)
{
return state == IB_PORT_ACTIVE ? 5 : 3;
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
struct mlx4_ib_iboe *iboe = &mdev->iboe;
struct net_device *ndev;
enum ib_mtu tmp;
struct mlx4_cmd_mailbox *mailbox;
unsigned long flags;
int err = 0;
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
goto out;
props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = IB_SPEED_QDR;
props->port_cap_flags = IB_PORT_CM_SUP;
if (netw_view)
props->gid_tbl_len = MLX4_ROCE_MAX_GIDS;
else
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
props->max_mtu = IB_MTU_4096;
props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
spin_lock_irqsave(&iboe->lock, flags);
ndev = iboe->netdevs[port - 1];
if (!ndev)
goto out_unlock;
tmp = iboe_get_mtu(ndev->if_mtu);
props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_irqrestore(&iboe->lock, flags);
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
}
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
int err;
memset(props, 0, sizeof *props);
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
eth_link_query_port(ibdev, port, props, netw_view);
return err;
}
static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
/* returns host view */
return __mlx4_ib_query_port(ibdev, port, props, 0);
}
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int clear = 0;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
if (mlx4_is_mfunc(dev->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
if (mlx4_is_mfunc(dev->dev) && !netw_view) {
if (index) {
/* For any index > 0, return the null guid */
err = 0;
clear = 1;
goto out;
}
}
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
if (clear)
memset(gid->raw + 8, 0, 8);
kfree(in_mad);
kfree(out_mad);
return err;
}
static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
*gid = dev->iboe.gid_table[port - 1][index];
return 0;
}
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
else
return iboe_query_gid(ibdev, port, index, gid);
}
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
}
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx4_cmd_mailbox *mailbox;
unsigned long flags;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
if (mlx4_is_slave(to_mdev(ibdev)->dev))
return -EOPNOTSUPP;
spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
memcpy(ibdev->node_desc, props->node_desc, 64);
spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
/*
* If possible, pass node desc to FW, so it can generate
* a 144 trap. If cmd fails, just ignore.
*/
mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
if (IS_ERR(mailbox))
return 0;
memset(mailbox->buf, 0, 256);
memcpy(mailbox->buf, props->node_desc, 64);
mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
return 0;
}
static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, 256);
if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
} else {
((u8 *) mailbox->buf)[3] = !!reset_qkey_viols;
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
}
err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
}
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
struct ib_port_attr attr;
u32 cap_mask;
int err;
mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
goto out;
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mlx4_SET_PORT(to_mdev(ibdev), port,
!!(mask & IB_PORT_RESET_QKEY_CNTR),
cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
return err;
}
static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
if (mlx4_wc_enabled()) {
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
} else {
resp_v3.bf_reg_size = 0;
resp_v3.bf_regs_per_page = 0;
}
} else {
resp.dev_caps = dev->dev->caps.userspace_caps;
resp.qp_tab_size = dev->dev->caps.num_qps;
if (mlx4_wc_enabled()) {
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
} else {
resp.bf_reg_size = 0;
resp.bf_regs_per_page = 0;
}
resp.cqe_size = dev->dev->caps.cqe_size;
}
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
if (err) {
kfree(context);
return ERR_PTR(err);
}
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
return ERR_PTR(-EFAULT);
}
return &context->ibucontext;
}
static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
kfree(context);
return 0;
}
/* XXX FBSD has no support for get_unmapped_area function */
#if 0
static unsigned long mlx4_ib_get_unmapped_area(struct file *file,
unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long start_addr;
unsigned long page_size_order;
unsigned long command;
mm = current->mm;
if (addr)
return current->mm->get_unmapped_area(file, addr, len,
pgoff, flags);
/* Last 8 bits hold the command others are data per that command */
command = pgoff & MLX4_IB_MMAP_CMD_MASK;
if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES)
return current->mm->get_unmapped_area(file, addr, len,
pgoff, flags);
page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS;
/* code is based on the huge-pages get_unmapped_area code */
start_addr = mm->free_area_cache;
if (len <= mm->cached_hole_size)
start_addr = TASK_UNMAPPED_BASE;
full_search:
addr = ALIGN(start_addr, 1 << page_size_order);
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (TASK_SIZE - len < addr) {
/*
* Start a new search - just in case we missed
* some holes.
*/
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = TASK_UNMAPPED_BASE;
goto full_search;
}
return -ENOMEM;
}
if (!vma || addr + len <= vma->vm_start)
return addr;
addr = ALIGN(vma->vm_end, 1 << page_size_order);
}
}
#endif
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
/* Last 8 bits hold the command others are data per that command */
unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK;
if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
/* compatibility handling for commands 0 & 1*/
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
}
if (command == MLX4_IB_MMAP_UAR_PAGE) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE &&
dev->dev->caps.bf_reg_size != 0) {
vma->vm_page_prot = pgprot_wc(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else if (command == MLX4_IB_MMAP_GET_HW_CLOCK) {
struct mlx4_clock_params params;
int ret;
ret = mlx4_get_internal_clock_params(dev->dev, &params);
if (ret)
return ret;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
(pci_resource_start(dev->dev->pdev,
params.bar) + params.offset)
>> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else
return -EINVAL;
return 0;
}
static int mlx4_ib_ioctl(struct ib_ucontext *context, unsigned int cmd,
unsigned long arg)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
int ret;
int offset;
switch (cmd) {
case MLX4_IOCHWCLOCKOFFSET: {
struct mlx4_clock_params params;
int ret;
ret = mlx4_get_internal_clock_params(dev->dev, &params);
if (!ret) {
offset = params.offset % PAGE_SIZE;
ret = put_user(offset,
(int *)arg);
return sizeof(int);
} else {
return ret;
}
}
default: {
pr_err("mlx4_ib: invalid ioctl %u command with arg %lX\n",
cmd, arg);
return -ENOTTY;
}
}
return ret;
}
static int mlx4_ib_query_values(struct ib_device *device, int q_values,
struct ib_device_values *values)
{
struct mlx4_ib_dev *dev = to_mdev(device);
s64 cycles;
values->values_mask = 0;
if (q_values & IBV_VALUES_HW_CLOCK) {
cycles = mlx4_read_clock(dev->dev);
if (cycles >= 0) {
values->hwclock = cycles & CORE_CLOCK_MASK;
values->values_mask |= IBV_VALUES_HW_CLOCK;
}
q_values &= ~IBV_VALUES_HW_CLOCK;
}
if (q_values)
return -ENOTTY;
return 0;
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_pd *pd;
int err;
pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context)
if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
return &pd->ibpd;
}
static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
kfree(pd);
return 0;
}
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_xrcd *xrcd;
int err;
if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
return ERR_PTR(-ENOSYS);
xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
if (!xrcd)
return ERR_PTR(-ENOMEM);
err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
if (err)
goto err1;
xrcd->pd = ib_alloc_pd(ibdev);
if (IS_ERR(xrcd->pd)) {
err = PTR_ERR(xrcd->pd);
goto err2;
}
xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
}
return &xrcd->ibxrcd;
err3:
ib_dealloc_pd(xrcd->pd);
err2:
mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
err1:
kfree(xrcd);
return ERR_PTR(err);
}
static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
kfree(xrcd);
return 0;
}
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_gid_entry *ge;
ge = kzalloc(sizeof *ge, GFP_KERNEL);
if (!ge)
return -ENOMEM;
ge->gid = *gid;
if (mlx4_ib_add_mc(mdev, mqp, gid)) {
ge->port = mqp->port;
ge->added = 1;
}
mutex_lock(&mqp->mutex);
list_add_tail(&ge->list, &mqp->gid_list);
mutex_unlock(&mqp->mutex);
return 0;
}
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
u8 mac[6];
struct net_device *ndev;
int ret = 0;
if (!mqp->port)
return 0;
spin_lock(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
if (ndev) {
rdma_get_mcast_mac((struct in6_addr *)gid, mac);
rtnl_lock();
dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac, 6, 0);
ret = 1;
rtnl_unlock();
dev_put(ndev);
}
return ret;
}
struct mlx4_ib_steering {
struct list_head list;
u64 reg_id;
union ib_gid gid;
};
static int parse_flow_attr(struct mlx4_dev *dev,
union ib_flow_spec *ib_spec,
struct _rule_hw *mlx4_spec)
{
enum mlx4_net_trans_rule_id type;
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
type = MLX4_NET_TRANS_RULE_ID_ETH;
memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
ETH_ALEN);
memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
ETH_ALEN);
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
break;
case IB_FLOW_SPEC_IB:
type = MLX4_NET_TRANS_RULE_ID_IB;
mlx4_spec->ib.l3_qpn = ib_spec->ib.val.l3_type_qpn;
mlx4_spec->ib.qpn_mask = ib_spec->ib.mask.l3_type_qpn;
memcpy(&mlx4_spec->ib.dst_gid, ib_spec->ib.val.dst_gid, 16);
memcpy(&mlx4_spec->ib.dst_gid_msk,
ib_spec->ib.mask.dst_gid, 16);
break;
case IB_FLOW_SPEC_IPV4:
type = MLX4_NET_TRANS_RULE_ID_IPV4;
mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
type = ib_spec->type == IB_FLOW_SPEC_TCP ?
MLX4_NET_TRANS_RULE_ID_TCP :
MLX4_NET_TRANS_RULE_ID_UDP;
mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
mlx4_spec->tcp_udp.dst_port_msk =
ib_spec->tcp_udp.mask.dst_port;
mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
mlx4_spec->tcp_udp.src_port_msk =
ib_spec->tcp_udp.mask.src_port;
break;
default:
return -EINVAL;
}
if (map_sw_to_hw_steering_id(dev, type) < 0 ||
hw_rule_sz(dev, type) < 0)
return -EINVAL;
mlx4_spec->id = cpu_to_be16(map_sw_to_hw_steering_id(dev, type));
mlx4_spec->size = hw_rule_sz(dev, type) >> 2;
return hw_rule_sz(dev, type);
}
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
int domain,
enum mlx4_net_trans_promisc_mode flow_type,
u64 *reg_id)
{
int ret, i;
int size = 0;
void *ib_flow;
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
(sizeof(struct _rule_hw) * flow_attr->num_of_specs);
static const u16 __mlx4_domain[] = {
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
[IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
[IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
[IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
};
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
pr_err("Invalid priority value.\n");
return -EINVAL;
}
if (domain >= IB_FLOW_DOMAIN_NUM) {
pr_err("Invalid domain value.\n");
return -EINVAL;
}
if (map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
return -EINVAL;
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, rule_size);
ctrl = mailbox->buf;
ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
flow_attr->priority);
ctrl->type = map_sw_to_hw_steering_mode(mdev->dev, flow_type);
ctrl->port = flow_attr->port;
ctrl->qpn = cpu_to_be32(qp->qp_num);
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK)
ctrl->flags = (1 << 3);
ib_flow = flow_attr + 1;
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
for (i = 0; i < flow_attr->num_of_specs; i++) {
ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
}
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
size += ret;
}
ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_NATIVE);
if (ret == -ENOMEM)
pr_err("mcg table is full. Fail to register network rule.\n");
else if (ret == -ENXIO)
pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
else if (ret)
pr_err("Invalid argumant. Fail to register network rule.\n");
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return ret;
}
static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
{
int err;
err = mlx4_cmd(dev, reg_id, 0, 0,
MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_NATIVE);
if (err)
pr_err("Fail to detach network rule. registration id = 0x%llx\n",
(unsigned long long)reg_id);
return err;
}
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
int err = 0, i = 0;
struct mlx4_ib_flow *mflow;
enum mlx4_net_trans_promisc_mode type[2];
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(struct mlx4_ib_flow), GFP_KERNEL);
if (!mflow) {
err = -ENOMEM;
goto err_free;
}
switch (flow_attr->type) {
case IB_FLOW_ATTR_NORMAL:
type[0] = MLX4_FS_REGULAR;
break;
case IB_FLOW_ATTR_ALL_DEFAULT:
type[0] = MLX4_FS_ALL_DEFAULT;
break;
case IB_FLOW_ATTR_MC_DEFAULT:
type[0] = MLX4_FS_MC_DEFAULT;
break;
case IB_FLOW_ATTR_SNIFFER:
type[0] = MLX4_FS_UC_SNIFFER;
type[1] = MLX4_FS_MC_SNIFFER;
break;
default:
err = -EINVAL;
goto err_free;
}
while (i < ARRAY_SIZE(type) && type[i]) {
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
&mflow->reg_id[i]);
if (err)
goto err_free;
i++;
}
return &mflow->ibflow;
err_free:
kfree(mflow);
return ERR_PTR(err);
}
static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
{
int err, ret = 0;
int i = 0;
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
if (err)
ret = err;
i++;
}
kfree(mflow);
return ret;
}
static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
{
struct mlx4_ib_gid_entry *ge;
struct mlx4_ib_gid_entry *tmp;
struct mlx4_ib_gid_entry *ret = NULL;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
if (!memcmp(raw, ge->gid.raw, 16)) {
ret = ge;
break;
}
}
return ret;
}
static int del_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct mlx4_ib_gid_entry *ge;
struct net_device *ndev;
u8 mac[6];
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
spin_lock(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
rdma_get_mcast_mac((struct in6_addr *)gid, mac);
if (ndev) {
rtnl_lock();
dev_mc_delete(mdev->iboe.netdevs[ge->port - 1], mac, 6, 0);
rtnl_unlock();
dev_put(ndev);
}
list_del(&ge->list);
kfree(ge);
} else
pr_warn("could not find mgid entry\n");
mutex_unlock(&mqp->mutex);
return ge != NULL ? 0 : -EINVAL;
}
static int _mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid,
int count)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u64 reg_id = 0;
int record_err = 0;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
struct mlx4_ib_steering *ib_steering;
struct mlx4_ib_steering *tmp;
LIST_HEAD(temp);
mutex_lock(&mqp->mutex);
list_for_each_entry_safe(ib_steering, tmp, &mqp->steering_rules,
list) {
if (memcmp(ib_steering->gid.raw, gid->raw, 16))
continue;
if (--count < 0)
break;
list_del(&ib_steering->list);
list_add(&ib_steering->list, &temp);
}
mutex_unlock(&mqp->mutex);
list_for_each_entry_safe(ib_steering, tmp, &temp,
list) {
reg_id = ib_steering->reg_id;
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp,
gid->raw,
(ibqp->qp_type == IB_QPT_RAW_PACKET) ?
MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
reg_id);
if (err) {
record_err = record_err ?: err;
continue;
}
err = del_gid_entry(ibqp, gid);
if (err) {
record_err = record_err ?: err;
continue;
}
list_del(&ib_steering->list);
kfree(ib_steering);
}
mutex_lock(&mqp->mutex);
list_for_each_entry(ib_steering, &temp, list) {
list_add(&ib_steering->list, &mqp->steering_rules);
}
mutex_unlock(&mqp->mutex);
if (count) {
pr_warn("Couldn't release all reg_ids for mgid. Steering rule is left attached\n");
return -EINVAL;
}
} else {
if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 &&
ibqp->qp_type == IB_QPT_RAW_PACKET)
gid->raw[5] = mqp->port;
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
(ibqp->qp_type == IB_QPT_RAW_PACKET) ?
MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
reg_id);
if (err)
return err;
err = del_gid_entry(ibqp, gid);
if (err)
return err;
}
return record_err;
}
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
int count = (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) ?
mdev->dev->caps.num_ports : 1;
return _mlx4_ib_mcg_detach(ibqp, gid, lid, count);
}
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err = -ENODEV;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
DECLARE_BITMAP(ports, MLX4_MAX_PORTS);
int i = 0;
if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 &&
ibqp->qp_type == IB_QPT_RAW_PACKET)
gid->raw[5] = mqp->port;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
bitmap_fill(ports, mdev->dev->caps.num_ports);
} else {
if (mqp->port <= mdev->dev->caps.num_ports) {
bitmap_zero(ports, mdev->dev->caps.num_ports);
set_bit(0, ports);
} else {
return -EINVAL;
}
}
for (; i < mdev->dev->caps.num_ports; i++) {
u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
if (!test_bit(i, ports))
continue;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
if (!ib_steering)
goto err_add;
}
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp,
gid->raw, i + 1,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
(ibqp->qp_type == IB_QPT_RAW_PACKET) ?
MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
&reg_id);
if (err) {
kfree(ib_steering);
goto err_add;
}
err = add_gid_entry(ibqp, gid);
if (err) {
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
MLX4_PROT_IB_IPV6, reg_id);
kfree(ib_steering);
goto err_add;
}
if (ib_steering) {
memcpy(ib_steering->gid.raw, gid->raw, 16);
mutex_lock(&mqp->mutex);
list_add(&ib_steering->list, &mqp->steering_rules);
mutex_unlock(&mqp->mutex);
ib_steering->reg_id = reg_id;
}
}
return 0;
err_add:
if (i > 0)
_mlx4_ib_mcg_detach(ibqp, gid, lid, i);
return err;
}
static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
if (mlx4_is_master(dev->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id);
}
static ssize_t show_vsd(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
ssize_t len = MLX4_VSD_LEN;
if (dev->dev->vsd_vendor_id == PCI_VENDOR_ID_MELLANOX)
len = sprintf(buf, "%.*s\n", MLX4_VSD_LEN, dev->dev->vsd);
else
memcpy(buf, dev->dev->vsd, MLX4_VSD_LEN);
return len;
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(vsd, S_IRUGO, show_vsd, NULL);
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_vsd
};
static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev, u8 port)
{
memcpy(eui, IF_LLADDR(dev), 3);
memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
if (vlan_id < 0x1000) {
eui[3] = vlan_id >> 8;
eui[4] = vlan_id & 0xff;
} else {
eui[3] = 0xff;
eui[4] = 0xfe;
}
eui[0] ^= 2;
}
static void update_gids_task(struct work_struct *work)
{
struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
struct mlx4_cmd_mailbox *mailbox;
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
goto free;
}
gids = mailbox->buf;
memcpy(gids, gw->gids, sizeof gw->gids);
if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
else
mlx4_ib_dispatch_event(gw->dev, gw->port,
IB_EVENT_GID_CHANGE);
}
mlx4_free_cmd_mailbox(dev, mailbox);
free:
kfree(gw);
}
static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
{
struct mlx4_ib_dev *ibdev = to_mdev(device);
return mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
}
static void reset_gids_task(struct work_struct *work)
{
struct update_gid_work *gw =
container_of(work, struct update_gid_work, work);
struct mlx4_cmd_mailbox *mailbox;
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("reset gid table failed\n");
goto free;
}
gids = mailbox->buf;
memcpy(gids, gw->gids, sizeof(gw->gids));
if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET &&
dev->caps.num_ports > 0) {
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | 1,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port 1 command failed\n");
}
if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 2) ==
IB_LINK_LAYER_ETHERNET &&
dev->caps.num_ports > 1) {
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | 2,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port 2 command failed\n");
}
mlx4_free_cmd_mailbox(dev, mailbox);
free:
kfree(gw);
}
static int update_gid_table(struct mlx4_ib_dev *dev, int port,
union ib_gid *gid, int clear, int default_gid)
{
struct update_gid_work *work;
int i;
int need_update = 0;
int free = -1;
int found = -1;
int max_gids;
int start_index = !default_gid;
max_gids = dev->dev->caps.gid_table_len[port];
for (i = start_index; i < max_gids; ++i) {
if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
sizeof(*gid)))
found = i;
if (clear) {
if (found >= 0) {
need_update = 1;
dev->iboe.gid_table[port - 1][found] = zgid;
break;
}
} else {
if (found >= 0)
break;
if (free < 0 &&
!memcmp(&dev->iboe.gid_table[port - 1][i],
&zgid, sizeof(*gid)))
free = i;
}
}
if (found == -1 && !clear && free < 0) {
pr_err("GID table of port %d is full. Can't add "GID_PRINT_FMT"\n",
port, GID_PRINT_ARGS(gid));
return -ENOMEM;
}
if (found == -1 && clear) {
pr_err(GID_PRINT_FMT" is not in GID table of port %d\n", GID_PRINT_ARGS(gid), port);
return -EINVAL;
}
if (found == -1 && !clear && free >= 0) {
dev->iboe.gid_table[port - 1][free] = *gid;
need_update = 1;
}
if (!need_update)
return 0;
work = kzalloc(sizeof *work, GFP_ATOMIC);
if (!work)
return -ENOMEM;
memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
INIT_WORK(&work->work, update_gids_task);
work->port = port;
work->dev = dev;
queue_work(wq, &work->work);
return 0;
}
static int reset_gid_table(struct mlx4_ib_dev *dev)
{
struct update_gid_work *work;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return -ENOMEM;
memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table));
memset(work->gids, 0, sizeof(work->gids));
INIT_WORK(&work->work, reset_gids_task);
work->dev = dev;
queue_work(wq, &work->work);
return 0;
}
/* XXX BOND Related - stub (no support for these flags in FBSD)*/
static inline int netif_is_bond_master(struct net_device *dev)
{
#if 0
return (dev->flags & IFF_MASTER) && (dev->priv_flags & IFF_BONDING);
#endif
return 0;
}
static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid, u8 port)
{
gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev, port);
}
static u8 mlx4_ib_get_dev_port(struct net_device *dev, struct mlx4_ib_dev *ibdev)
{
u8 port = 0;
struct mlx4_ib_iboe *iboe;
struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
rdma_vlan_dev_real_dev(dev) : dev;
iboe = &ibdev->iboe;
for (port = 1; port <= MLX4_MAX_PORTS; ++port)
if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) ||
(!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1])))
break;
return port > MLX4_MAX_PORTS ? 0 : port;
}
static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port)
{
struct ifaddr *ifa;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct inet6_dev *in6_dev;
union ib_gid *pgid;
struct inet6_ifaddr *ifp;
#endif
union ib_gid gid;
if ((port == 0) || (port > MLX4_MAX_PORTS))
return;
/* IPv4 gids */
TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET){
ipv6_addr_set_v4mapped(
((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr,
(struct in6_addr *)&gid);
update_gid_table(ibdev, port, &gid, 0, 0);
}
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/* IPv6 gids */
in6_dev = in6_dev_get(dev);
if (in6_dev) {
read_lock_bh(&in6_dev->lock);
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
pgid = (union ib_gid *)&ifp->addr;
update_gid_table(ibdev, port, pgid, 0, 0);
}
read_unlock_bh(&in6_dev->lock);
in6_dev_put(in6_dev);
}
#endif
}
static void mlx4_set_default_gid(struct mlx4_ib_dev *ibdev,
struct net_device *dev, u8 port)
{
union ib_gid gid;
mlx4_make_default_gid(dev, &gid, port);
update_gid_table(ibdev, port, &gid, 0, 1);
}
static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
{
struct net_device *dev;
if (reset_gid_table(ibdev))
return -1;
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(dev, &V_ifnet, if_link) {
u8 port = mlx4_ib_get_dev_port(dev, ibdev);
if (port) {
if (!rdma_vlan_dev_real_dev(dev) &&
!netif_is_bond_master(dev))
mlx4_set_default_gid(ibdev, dev, port);
mlx4_ib_get_dev_addr(dev, ibdev, port);
}
}
IFNET_RUNLOCK_NOSLEEP();
return 0;
}
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
struct net_device *dev, unsigned long event)
{
struct mlx4_ib_iboe *iboe;
int port;
int init = 0;
unsigned long flags;
iboe = &ibdev->iboe;
spin_lock_irqsave(&iboe->lock, flags);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
struct net_device *old_netdev = iboe->netdevs[port - 1];
/* XXX BOND related */
#if 0
struct net_device *old_master = iboe->masters[port - 1];
#endif
iboe->masters[port - 1] = NULL;
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
if (old_netdev != iboe->netdevs[port - 1])
init = 1;
if (dev == iboe->netdevs[port - 1] &&
event == NETDEV_CHANGEADDR)
init = 1;
/* XXX BOND related */
#if 0
if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1]))
iboe->masters[port - 1] = iboe->netdevs[port - 1]->master;
/* if bonding is used it is possible that we add it to masters only after
IP address is assigned to the net bonding interface */
if (old_master != iboe->masters[port - 1])
init = 1;
#endif
}
spin_unlock_irqrestore(&iboe->lock, flags);
if (init)
if (mlx4_ib_init_gid_table(ibdev))
pr_warn("Fail to reset gid table\n");
}
static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
struct mlx4_ib_dev *ibdev;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
mlx4_ib_scan_netdevs(ibdev, dev, event);
return NOTIFY_DONE;
}
/* This function initializes the gid table only if the event_netdev real device is an iboe
* device, will be invoked by the inet/inet6 events */
static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *event_netdev = ptr;
struct mlx4_ib_dev *ibdev;
struct mlx4_ib_iboe *ibdev_iboe;
int port = 0;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
rdma_vlan_dev_real_dev(event_netdev) :
event_netdev;
ibdev_iboe = &ibdev->iboe;
port = mlx4_ib_get_dev_port(real_dev, ibdev);
/* Perform init_gid_table if the event real_dev is the net_device which represents this port,
* otherwise this event is not related and would be ignored.*/
if(port && (real_dev == ibdev_iboe->netdevs[port - 1]))
if (mlx4_ib_init_gid_table(ibdev))
pr_warn("Fail to reset gid table\n");
return NOTIFY_DONE;
}
static void init_pkeys(struct mlx4_ib_dev *ibdev)
{
int port;
int slave;
int i;
if (mlx4_is_master(ibdev->dev)) {
for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
++i) {
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
/* master has the identity virt2phys pkey mapping */
(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
}
}
}
/* initialize pkey cache */
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
++i)
ibdev->pkeys.phys_pkey_cache[port-1][i] =
(i) ? 0 : 0xFFFF;
}
}
}
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
char name[32];
int eq_per_port = 0;
int added_eqs = 0;
int total_eqs = 0;
int i, j, eq;
/* Legacy mode or comp_pool is not large enough */
if (dev->caps.comp_pool == 0 ||
dev->caps.num_ports > dev->caps.comp_pool)
return;
eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
dev->caps.num_ports);
/* Init eq table */
added_eqs = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
added_eqs += eq_per_port;
total_eqs = dev->caps.num_comp_vectors + added_eqs;
ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
if (!ibdev->eq_table)
return;
ibdev->eq_added = added_eqs;
eq = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
sprintf(name, "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j,
pci_get_domain(dev->pdev->dev.bsddev),
pci_get_bus(dev->pdev->dev.bsddev),
PCI_SLOT(dev->pdev->devfn),
PCI_FUNC(dev->pdev->devfn));
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name,
&ibdev->eq_table[eq])) {
/* Use legacy (same as mlx4_en driver) */
pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
ibdev->eq_table[eq] =
(eq % dev->caps.num_comp_vectors);
}
eq++;
}
}
/* Fill the reset of the vector with legacy EQ */
for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
ibdev->eq_table[eq++] = i;
/* Advertise the new number of EQs to clients */
ibdev->ib_dev.num_comp_vectors = total_eqs;
}
static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
int i;
/* no additional eqs were added */
if (!ibdev->eq_table)
return;
/* Reset the advertised EQ number */
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
/* Free only the added eqs */
for (i = 0; i < ibdev->eq_added; i++) {
/* Don't free legacy eqs if used */
if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
continue;
mlx4_release_eq(dev, ibdev->eq_table[i]);
}
kfree(ibdev->eq_table);
}
/*
* create show function and a device_attribute struct pointing to
* the function for _name
*/
#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \
static ssize_t show_rprt_##_name(struct device *dev, \
struct device_attribute *attr, \
char *buf){ \
return show_diag_rprt(dev, buf, _offset, _op_mod); \
} \
static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
static size_t show_diag_rprt(struct device *device, char *buf,
u32 offset, u8 op_modifier)
{
size_t ret;
u32 counter_offset = offset;
u32 diag_counter = 0;
struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
ib_dev.dev);
ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
&counter_offset, &diag_counter);
if (ret)
return ret;
return sprintf(buf, "%d\n", diag_counter);
}
static ssize_t clear_diag_counters(struct device *device,
struct device_attribute *attr,
const char *buf, size_t length)
{
size_t ret;
struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
ib_dev.dev);
ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
NULL, NULL);
if (ret)
return ret;
return length;
}
DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2);
DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2);
DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2);
DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2);
DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2);
DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2);
static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters);
static struct attribute *diag_rprt_attrs[] = {
&dev_attr_rq_num_lle.attr,
&dev_attr_sq_num_lle.attr,
&dev_attr_rq_num_lqpoe.attr,
&dev_attr_sq_num_lqpoe.attr,
&dev_attr_rq_num_lpe.attr,
&dev_attr_sq_num_lpe.attr,
&dev_attr_rq_num_wrfe.attr,
&dev_attr_sq_num_wrfe.attr,
&dev_attr_sq_num_mwbe.attr,
&dev_attr_sq_num_bre.attr,
&dev_attr_rq_num_lae.attr,
&dev_attr_sq_num_rire.attr,
&dev_attr_rq_num_rire.attr,
&dev_attr_sq_num_rae.attr,
&dev_attr_rq_num_rae.attr,
&dev_attr_sq_num_roe.attr,
&dev_attr_sq_num_tree.attr,
&dev_attr_sq_num_rree.attr,
&dev_attr_rq_num_rnr.attr,
&dev_attr_sq_num_rnr.attr,
&dev_attr_rq_num_oos.attr,
&dev_attr_sq_num_oos.attr,
&dev_attr_rq_num_mce.attr,
&dev_attr_rq_num_udsdprd.attr,
&dev_attr_rq_num_ucsdprd.attr,
&dev_attr_num_cqovf.attr,
&dev_attr_num_eqovf.attr,
&dev_attr_num_baddb.attr,
&dev_attr_clear_diag.attr,
NULL
};
static struct attribute_group diag_counters_group = {
.name = "diag_counters",
.attrs = diag_rprt_attrs
};
static void init_dev_assign(void)
{
int i = 1;
spin_lock_init(&dev_num_str_lock);
if (mlx4_fill_dbdf2val_tbl(&dev_assign_str))
return;
dev_num_str_bitmap =
kmalloc(BITS_TO_LONGS(MAX_NUM_STR_BITMAP) * sizeof(long),
GFP_KERNEL);
if (!dev_num_str_bitmap) {
pr_warn("bitmap alloc failed -- cannot apply dev_assign_str parameter\n");
return;
}
bitmap_zero(dev_num_str_bitmap, MAX_NUM_STR_BITMAP);
while ((i < MLX4_DEVS_TBL_SIZE) && (dev_assign_str.tbl[i].dbdf !=
MLX4_ENDOF_TBL)) {
if (bitmap_allocate_region(dev_num_str_bitmap,
dev_assign_str.tbl[i].val[0], 0))
goto err;
i++;
}
dr_active = 1;
return;
err:
kfree(dev_num_str_bitmap);
dev_num_str_bitmap = NULL;
pr_warn("mlx4_ib: The value of 'dev_assign_str' parameter "
"is incorrect. The parameter value is discarded!");
}
static int mlx4_ib_dev_idx(struct mlx4_dev *dev)
{
int i, val;
if (!dr_active)
return -1;
if (!dev)
return -1;
if (mlx4_get_val(dev_assign_str.tbl, dev->pdev, 0, &val))
return -1;
if (val != DEFAULT_TBL_VAL) {
dev->flags |= MLX4_FLAG_DEV_NUM_STR;
return val;
}
spin_lock(&dev_num_str_lock);
i = bitmap_find_free_region(dev_num_str_bitmap, MAX_NUM_STR_BITMAP, 0);
spin_unlock(&dev_num_str_lock);
if (i >= 0)
return i;
return -1;
}
+static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ int err;
+
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCEV2)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+ if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ }
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
int i, j;
int err;
struct mlx4_ib_iboe *iboe;
int dev_idx;
pr_info_once("%s", mlx4_ib_version);
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
/* No point in registering a device with no ports... */
if (num_ports == 0)
return NULL;
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
return NULL;
}
iboe = &ibdev->iboe;
if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
goto err_dealloc;
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
goto err_pd;
ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT,
PAGE_SIZE);
if (!ibdev->priv_uar.map)
goto err_uar;
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
dev_idx = mlx4_ib_dev_idx(dev);
if (dev_idx >= 0)
sprintf(ibdev->ib_dev.name, "mlx4_%d", dev_idx);
else
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
else
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
ibdev->ib_dev.mmap = mlx4_ib_mmap;
/* XXX FBSD has no support for get_unmapped_area function */
#if 0
ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area;
#endif
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
ibdev->ib_dev.post_send = mlx4_ib_post_send;
ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
+ ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
ibdev->ib_dev.ioctl = mlx4_ib_ioctl;
ibdev->ib_dev.query_values = mlx4_ib_query_values;
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
/*
* Set experimental data
*/
ibdev->ib_dev.uverbs_exp_cmd_mask =
(1ull << IB_USER_VERBS_EXP_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_EXP_CMD_MODIFY_CQ) |
(1ull << IB_USER_VERBS_EXP_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EXP_CMD_CREATE_CQ);
ibdev->ib_dev.exp_create_qp = mlx4_ib_exp_create_qp;
ibdev->ib_dev.exp_query_device = mlx4_ib_exp_query_device;
if (check_flow_steering_support(dev)) {
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
} else {
pr_debug("Device managed flow steering is unavailable for this configuration.\n");
}
/*
* End of experimental data
*/
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
if (init_node_data(ibdev))
goto err_map;
for (i = 0; i < ibdev->num_ports; ++i) {
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
if (mlx4_is_slave(dev)) {
ibdev->counters[i].status = mlx4_counter_alloc(ibdev->dev,
i + 1,
&ibdev->counters[i].counter_index);
} else {/* allocating the PF IB default counter indices reserved in mlx4_init_counters_table */
ibdev->counters[i].counter_index = ((i + 1) << 1) - 1;
ibdev->counters[i].status = 0;
}
dev_info(&dev->pdev->dev,
"%s: allocated counter index %d for port %d\n",
__func__, ibdev->counters[i].counter_index, i+1);
} else {
ibdev->counters[i].counter_index = MLX4_SINK_COUNTER_INDEX;
ibdev->counters[i].status = -ENOSPC;
}
}
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
!mlx4_is_mfunc(dev)) {
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0);
if (err)
goto err_counter;
ibdev->ib_uc_qpns_bitmap =
kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
dev_err(&dev->pdev->dev, "bit map alloc failed\n");
goto err_steer_qp_release;
}
bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1);
if (err)
goto err_steer_free_bitmap;
}
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err) {
iboe->nb.notifier_call = NULL;
goto err_notify;
}
}
if (!iboe->nb_inet.notifier_call) {
iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
err = register_inetaddr_notifier(&iboe->nb_inet);
if (err) {
iboe->nb_inet.notifier_call = NULL;
goto err_notify;
}
}
mlx4_ib_scan_netdevs(ibdev, NULL, 0);
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]))
goto err_notify;
}
if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
goto err_notify;
ibdev->ib_active = true;
if (mlx4_is_mfunc(ibdev->dev))
init_pkeys(ibdev);
/* create paravirt contexts for any VFs which are active */
if (mlx4_is_master(ibdev->dev)) {
for (j = 0; j < MLX4_MFUNC_MAX; j++) {
if (j == mlx4_master_func_num(ibdev->dev))
continue;
if (mlx4_is_slave_active(ibdev->dev, j))
do_slave_init(ibdev, j, 1);
}
}
return ibdev;
err_notify:
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
device_remove_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]);
}
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
if (ibdev->iboe.nb_inet.notifier_call) {
if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb_inet.notifier_call = NULL;
}
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
err_mad:
mlx4_ib_mad_cleanup(ibdev);
err_reg:
ib_unregister_device(&ibdev->ib_dev);
err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
for (; i; --i) {
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i) ==
IB_LINK_LAYER_ETHERNET) {
mlx4_counter_free(ibdev->dev,
i,
ibdev->counters[i - 1].counter_index);
}
}
err_map:
iounmap(ibdev->priv_uar.map);
mlx4_ib_free_eqs(dev, ibdev);
err_uar:
mlx4_uar_free(dev, &ibdev->priv_uar);
err_pd:
mlx4_pd_free(dev, ibdev->priv_pdn);
err_dealloc:
ib_dealloc_device(&ibdev->ib_dev);
return NULL;
}
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
{
int offset;
WARN_ON(!dev->ib_uc_qpns_bitmap);
offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
dev->steer_qpn_count,
get_count_order(count));
if (offset < 0)
return offset;
*qpn = dev->steer_qpn_base + offset;
return 0;
}
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
{
if (!qpn ||
dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
return;
BUG_ON(qpn < dev->steer_qpn_base);
bitmap_release_region(dev->ib_uc_qpns_bitmap,
qpn - dev->steer_qpn_base, get_count_order(count));
}
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach)
{
int err;
size_t flow_size;
struct ib_flow_attr *flow = NULL;
struct ib_flow_spec_ib *ib_spec;
if (is_attach) {
flow_size = sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_ib);
flow = kzalloc(flow_size, GFP_KERNEL);
if (!flow)
return -ENOMEM;
flow->port = mqp->port;
flow->num_of_specs = 1;
flow->size = flow_size;
ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
ib_spec->type = IB_FLOW_SPEC_IB;
ib_spec->size = sizeof(struct ib_flow_spec_ib);
ib_spec->val.l3_type_qpn = mqp->ibqp.qp_num;
ib_spec->mask.l3_type_qpn = MLX4_IB_FLOW_QPN_MASK;
err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
IB_FLOW_DOMAIN_NIC,
MLX4_FS_REGULAR,
&mqp->reg_id);
} else {
err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
kfree(flow);
return err;
}
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p, j;
int dev_idx, ret;
if (ibdev->iboe.nb_inet.notifier_call) {
if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb_inet.notifier_call = NULL;
}
mlx4_ib_close_sriov(ibdev);
sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);
mlx4_ib_mad_cleanup(ibdev);
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
device_remove_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]);
}
dev_idx = -1;
if (dr_active && !(ibdev->dev->flags & MLX4_FLAG_DEV_NUM_STR)) {
ret = sscanf(ibdev->ib_dev.name, "mlx4_%d", &dev_idx);
if (ret != 1)
dev_idx = -1;
}
ib_unregister_device(&ibdev->ib_dev);
if (dev_idx >= 0) {
spin_lock(&dev_num_str_lock);
bitmap_release_region(dev_num_str_bitmap, dev_idx, 0);
spin_unlock(&dev_num_str_lock);
}
if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
}
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
iounmap(ibdev->priv_uar.map);
for (p = 0; p < ibdev->num_ports; ++p) {
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, p + 1) ==
IB_LINK_LAYER_ETHERNET) {
mlx4_counter_free(ibdev->dev,
p + 1,
ibdev->counters[p].counter_index);
}
}
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
mlx4_ib_free_eqs(dev, ibdev);
mlx4_uar_free(dev, &ibdev->priv_uar);
mlx4_pd_free(dev, ibdev->priv_pdn);
ib_dealloc_device(&ibdev->ib_dev);
}
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
{
struct mlx4_ib_demux_work **dm = NULL;
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
if (!mlx4_is_master(dev))
return;
dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
if (!dm) {
pr_err("failed to allocate memory for tunneling qp update\n");
goto out;
}
for (i = 0; i < dev->caps.num_ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
pr_err("failed to allocate memory for tunneling qp update work struct\n");
for (i = 0; i < dev->caps.num_ports; i++) {
if (dm[i])
kfree(dm[i]);
}
goto out;
}
}
/* initialize or tear down tunnel QPs for the slave */
for (i = 0; i < dev->caps.num_ports; i++) {
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
dm[i]->port = i + 1;
dm[i]->slave = slave;
dm[i]->do_init = do_init;
dm[i]->dev = ibdev;
spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
if (!ibdev->sriov.is_going_down)
queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
}
out:
if (dm)
kfree(dm);
return;
}
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, unsigned long param)
{
struct ib_event ibev;
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int p = 0;
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = (struct mlx4_eqe *)param;
else
p = (int) param;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
return;
if (mlx4_is_master(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
mlx4_ib_invalidate_all_guid_record(ibdev, p);
}
mlx4_ib_info((struct ib_device *) ibdev_ptr,
"Port %d logical link is up\n", p);
ibev.event = IB_EVENT_PORT_ACTIVE;
break;
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
return;
mlx4_ib_info((struct ib_device *) ibdev_ptr,
"Port %d logical link is down\n", p);
ibev.event = IB_EVENT_PORT_ERR;
break;
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew) {
pr_err("failed to allocate memory for events work\n");
break;
}
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
ew->ib_dev = ibdev;
/* need to queue only for port owner, which uses GEN_EQE */
if (mlx4_is_master(dev))
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
return;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
do_slave_init(ibdev, p, 1);
return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
return;
default:
return;
}
ibev.device = ibdev_ptr;
ibev.element.port_num = (u8) p;
ib_dispatch_event(&ibev);
}
static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
.event = mlx4_ib_event,
.protocol = MLX4_PROT_IB_IPV6
};
static int __init mlx4_ib_init(void)
{
int err;
wq = create_singlethread_workqueue("mlx4_ib");
if (!wq)
return -ENOMEM;
err = mlx4_ib_mcg_init();
if (err)
goto clean_proc;
init_dev_assign();
err = mlx4_register_interface(&mlx4_ib_interface);
if (err)
goto clean_mcg;
return 0;
clean_mcg:
mlx4_ib_mcg_destroy();
clean_proc:
destroy_workqueue(wq);
return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
kfree(dev_num_str_bitmap);
}
module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE);
module_exit(mlx4_ib_cleanup);
static int
mlx4ib_evhand(module_t mod, int event, void *arg)
{
return (0);
}
static moduledata_t mlx4ib_mod = {
.name = "mlx4ib",
.evhand = mlx4ib_evhand,
};
DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_LAST, SI_ORDER_ANY);
MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
MODULE_DEPEND(mlx4ib, linuxkpi, 1, 1, 1);
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
index 6b8cf0cf40ec..ac1bb7a916a5 100644
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
@@ -1,2290 +1,2342 @@
/*-
* Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <dev/mlx5/driver.h>
#include <dev/mlx5/vport.h>
#include <asm/pgtable.h>
#include <linux/fs.h>
#undef inode
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include "user.h"
#include "mlx5_ib.h"
#include <sys/unistd.h>
#include <sys/kthread.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "3.2-rc1"
#define DRIVER_RELDATE "May 2016"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
MODULE_VERSION(mlx5ib, 1);
static int deprecated_prof_sel = 2;
module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
enum {
MLX5_STANDARD_ATOMIC_SIZE = 0x8,
};
struct workqueue_struct *mlx5_ib_wq;
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
static void get_atomic_caps(struct mlx5_ib_dev *dev,
struct ib_device_attr *props)
{
int tmp;
u8 atomic_operations;
u8 atomic_size_qp;
u8 atomic_req_endianess;
atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
atomic_req_8B_endianess_mode) ||
!mlx5_host_is_le();
tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
if (((atomic_operations & tmp) == tmp)
&& (atomic_size_qp & 8)) {
if (atomic_req_endianess) {
props->atomic_cap = IB_ATOMIC_HCA;
} else {
props->atomic_cap = IB_ATOMIC_NONE;
}
} else {
props->atomic_cap = IB_ATOMIC_NONE;
}
tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
if (((atomic_operations & tmp) == tmp)
&&(atomic_size_qp & 8)) {
if (atomic_req_endianess)
props->masked_atomic_cap = IB_ATOMIC_HCA;
else {
props->masked_atomic_cap = IB_ATOMIC_NONE;
}
} else {
props->masked_atomic_cap = IB_ATOMIC_NONE;
}
}
static enum rdma_link_layer
mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(device);
switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
}
}
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !dev->mdev->issi;
}
enum {
MLX5_VPORT_ACCESS_METHOD_MAD,
MLX5_VPORT_ACCESS_METHOD_HCA,
MLX5_VPORT_ACCESS_METHOD_NIC,
};
static int mlx5_get_vport_access_method(struct ib_device *ibdev)
{
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
u64 tmp;
int err;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_system_image_guid_mad_ifc(ibdev,
sys_image_guid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
if (!err)
*sys_image_guid = cpu_to_be64(tmp);
return err;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
if (!err)
*sys_image_guid = cpu_to_be64(tmp);
return err;
default:
return -EINVAL;
}
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
u16 *max_pkeys)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
pkey_table_size));
return 0;
default:
return -EINVAL;
}
}
static int mlx5_query_vendor_id(struct ib_device *ibdev,
u32 *vendor_id)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
default:
return -EINVAL;
}
}
static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
__be64 *node_guid)
{
u64 tmp;
int err;
switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_node_guid_mad_ifc(dev, node_guid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
if (!err)
*node_guid = cpu_to_be64(tmp);
return err;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
if (!err)
*node_guid = cpu_to_be64(tmp);
return err;
default:
return -EINVAL;
}
}
struct mlx5_reg_node_desc {
u8 desc[64];
};
static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
{
struct mlx5_reg_node_desc in;
if (mlx5_use_mad_ifc(dev))
return mlx5_query_node_desc_mad_ifc(dev, node_desc);
memset(&in, 0, sizeof(in));
return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
sizeof(struct mlx5_reg_node_desc),
MLX5_REG_NODE_DESC, 0, 0);
}
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int max_sq_desc;
int max_rq_sg;
int max_sq_sg;
int err;
memset(props, 0, sizeof(*props));
err = mlx5_query_system_image_guid(ibdev,
&props->sys_image_guid);
if (err)
return err;
err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
if (err)
return err;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
return err;
props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
((u64)fw_rev_min(dev->mdev) << 16) |
fw_rev_sub(dev->mdev);
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN;
if (MLX5_CAP_GEN(mdev, pkv))
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (MLX5_CAP_GEN(mdev, qkv))
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (MLX5_CAP_GEN(mdev, apm))
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
props->max_mr_size = ~0ull;
props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
max_sq_sg = (max_sq_desc -
sizeof(struct mlx5_wqe_ctrl_seg) -
sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
get_atomic_caps(dev, props);
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
props->max_ah = INT_MAX;
return 0;
}
enum mlx5_ib_width {
MLX5_IB_WIDTH_1X = 1 << 0,
MLX5_IB_WIDTH_2X = 1 << 1,
MLX5_IB_WIDTH_4X = 1 << 2,
MLX5_IB_WIDTH_8X = 1 << 3,
MLX5_IB_WIDTH_12X = 1 << 4
};
static int translate_active_width(struct ib_device *ibdev, u8 active_width,
u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
int err = 0;
if (active_width & MLX5_IB_WIDTH_1X) {
*ib_width = IB_WIDTH_1X;
} else if (active_width & MLX5_IB_WIDTH_2X) {
mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
(int)active_width);
err = -EINVAL;
} else if (active_width & MLX5_IB_WIDTH_4X) {
*ib_width = IB_WIDTH_4X;
} else if (active_width & MLX5_IB_WIDTH_8X) {
*ib_width = IB_WIDTH_8X;
} else if (active_width & MLX5_IB_WIDTH_12X) {
*ib_width = IB_WIDTH_12X;
} else {
mlx5_ib_dbg(dev, "Invalid active_width %d\n",
(int)active_width);
err = -EINVAL;
}
return err;
}
/*
* TODO: Move to IB core
*/
enum ib_max_vl_num {
__IB_MAX_VL_0 = 1,
__IB_MAX_VL_0_1 = 2,
__IB_MAX_VL_0_3 = 3,
__IB_MAX_VL_0_7 = 4,
__IB_MAX_VL_0_14 = 5,
};
enum mlx5_vl_hw_cap {
MLX5_VL_HW_0 = 1,
MLX5_VL_HW_0_1 = 2,
MLX5_VL_HW_0_2 = 3,
MLX5_VL_HW_0_3 = 4,
MLX5_VL_HW_0_4 = 5,
MLX5_VL_HW_0_5 = 6,
MLX5_VL_HW_0_6 = 7,
MLX5_VL_HW_0_7 = 8,
MLX5_VL_HW_0_14 = 15
};
static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
u8 *max_vl_num)
{
switch (vl_hw_cap) {
case MLX5_VL_HW_0:
*max_vl_num = __IB_MAX_VL_0;
break;
case MLX5_VL_HW_0_1:
*max_vl_num = __IB_MAX_VL_0_1;
break;
case MLX5_VL_HW_0_3:
*max_vl_num = __IB_MAX_VL_0_3;
break;
case MLX5_VL_HW_0_7:
*max_vl_num = __IB_MAX_VL_0_7;
break;
case MLX5_VL_HW_0_14:
*max_vl_num = __IB_MAX_VL_0_14;
break;
default:
return -EINVAL;
}
return 0;
}
static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
u32 *rep;
int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
struct mlx5_ptys_reg *ptys;
struct mlx5_pmtu_reg *pmtu;
struct mlx5_pvlc_reg pvlc;
void *ctx;
int err;
rep = mlx5_vzalloc(outlen);
ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
if (!rep || !ptys || !pmtu) {
err = -ENOMEM;
goto out;
}
memset(props, 0, sizeof(*props));
/* what if I am pf with dual port */
err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
if (err)
goto out;
ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
props->lid = MLX5_GET(hca_vport_context, ctx, lid);
props->lmc = MLX5_GET(hca_vport_context, ctx, lmc);
props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid);
props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl);
props->state = MLX5_GET(hca_vport_context, ctx, vport_state);
props->phys_state = MLX5_GET(hca_vport_context, ctx,
port_physical_state);
props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1);
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx,
pkey_violation_counter);
props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx,
qkey_violation_counter);
props->subnet_timeout = MLX5_GET(hca_vport_context, ctx,
subnet_timeout);
props->init_type_reply = MLX5_GET(hca_vport_context, ctx,
init_type_reply);
ptys->proto_mask |= MLX5_PTYS_IB;
ptys->local_port = port;
err = mlx5_core_access_ptys(mdev, ptys, 0);
if (err)
goto out;
err = translate_active_width(ibdev, ptys->ib_link_width_oper,
&props->active_width);
if (err)
goto out;
props->active_speed = (u8)ptys->ib_proto_oper;
pmtu->local_port = port;
err = mlx5_core_access_pmtu(mdev, pmtu, 0);
if (err)
goto out;
props->max_mtu = pmtu->max_mtu;
props->active_mtu = pmtu->oper_mtu;
memset(&pvlc, 0, sizeof(pvlc));
pvlc.local_port = port;
err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
if (err)
goto out;
err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
&props->max_vl_num);
out:
kvfree(rep);
kfree(ptys);
kfree(pmtu);
return err;
}
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_port_mad_ifc(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_port_ib(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_query_port_roce(ibdev, port, props);
default:
return -EINVAL;
}
}
static inline int
mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
if (dev->if_addrlen != ETH_ALEN)
return -1;
memcpy(eui, IF_LLADDR(dev), 3);
memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
/* NOTE: The scope ID is added by the GID to IP conversion */
eui[3] = 0xFF;
eui[4] = 0xFE;
eui[0] ^= 2;
return 0;
}
static void
mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
{
gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
}
static void
mlx5_ib_roce_port_update(void *arg)
{
struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
struct mlx5_ib_dev *dev = port->dev;
struct mlx5_core_dev *mdev = dev->mdev;
struct net_device *xdev[MLX5_IB_GID_MAX];
struct net_device *idev;
struct net_device *ndev;
struct ifaddr *ifa;
union ib_gid gid_temp;
while (port->port_gone == 0) {
int update = 0;
int gid_index = 0;
int j;
int error;
ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
if (ndev == NULL) {
pause("W", hz);
continue;
}
CURVNET_SET_QUIET(ndev->if_vnet);
memset(&gid_temp, 0, sizeof(gid_temp));
mlx5_make_default_gid(ndev, &gid_temp);
if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
port->gid_table[gid_index] = gid_temp;
update = 1;
}
xdev[gid_index] = ndev;
gid_index++;
IFNET_RLOCK();
TAILQ_FOREACH(idev, &V_ifnet, if_link) {
if (idev == ndev)
break;
}
if (idev != NULL) {
TAILQ_FOREACH(idev, &V_ifnet, if_link) {
if (idev != ndev) {
if (idev->if_type != IFT_L2VLAN)
continue;
if (ndev != rdma_vlan_dev_real_dev(idev))
continue;
}
/* clone address information for IPv4 and IPv6 */
IF_ADDR_RLOCK(idev);
TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
if (ifa->ifa_addr == NULL ||
(ifa->ifa_addr->sa_family != AF_INET &&
ifa->ifa_addr->sa_family != AF_INET6) ||
gid_index >= MLX5_IB_GID_MAX)
continue;
memset(&gid_temp, 0, sizeof(gid_temp));
rdma_ip2gid(ifa->ifa_addr, &gid_temp);
/* check for existing entry */
for (j = 0; j != gid_index; j++) {
if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
break;
}
/* check if new entry must be added */
if (j == gid_index) {
if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
port->gid_table[gid_index] = gid_temp;
update = 1;
}
xdev[gid_index] = idev;
gid_index++;
}
}
IF_ADDR_RUNLOCK(idev);
}
}
IFNET_RUNLOCK();
CURVNET_RESTORE();
if (update != 0 &&
mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
struct ib_event event = {
.device = &dev->ib_dev,
.element.port_num = port->port_num + 1,
.event = IB_EVENT_GID_CHANGE,
};
/* add new entries, if any */
for (j = 0; j != gid_index; j++) {
error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
port->gid_table + j, xdev[j]);
if (error != 0)
printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
}
memset(&gid_temp, 0, sizeof(gid_temp));
/* clear old entries, if any */
for (; j != MLX5_IB_GID_MAX; j++) {
if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
continue;
port->gid_table[j] = gid_temp;
(void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
port->gid_table + j, ndev);
}
/* make sure ibcore gets updated */
ib_dispatch_event(&event);
}
pause("W", hz);
}
do {
struct ib_event event = {
.device = &dev->ib_dev,
.element.port_num = port->port_num + 1,
.event = IB_EVENT_GID_CHANGE,
};
/* make sure ibcore gets updated */
ib_dispatch_event(&event);
/* wait a bit */
pause("W", hz);
} while (0);
port->port_gone = 2;
kthread_exit();
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
case MLX5_VPORT_ACCESS_METHOD_NIC:
if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
index < 0 || index >= MLX5_IB_GID_MAX ||
dev->port[port - 1].port_gone != 0)
memset(gid, 0, sizeof(*gid));
else
*gid = dev->port[port - 1].gid_table[index];
return 0;
default:
return -EINVAL;
}
}
static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
pkey);
default:
return -EINVAL;
}
}
static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_reg_node_desc in;
struct mlx5_reg_node_desc out;
int err;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
/*
* If possible, pass node desc to FW, so it can generate
* a 144 trap. If cmd fails, just ignore.
*/
memcpy(&in, props->node_desc, 64);
err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
if (err)
return err;
memcpy(ibdev->node_desc, props->node_desc, 64);
return err;
}
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
IB_LINK_LAYER_ETHERNET);
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_port_attr attr;
u32 tmp;
int err;
/* return OK if this is RoCE. CM calls ib_modify_port() regardless
* of whether port link layer is ETH or IB. For ETH ports, qkey
* violations and port capabilities are not valid.
*/
if (is_eth)
return 0;
mutex_lock(&dev->cap_mask_mutex);
err = mlx5_ib_query_port(ibdev, port, &attr);
if (err)
goto out;
tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mlx5_set_port_caps(dev->mdev, port, tmp);
out:
mutex_unlock(&dev->cap_mask_mutex);
return err;
}
enum mlx5_cap_flags {
MLX5_CAP_COMPACT_AV = 1 << 0,
};
static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
{
*flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
MLX5_CAP_COMPACT_AV : 0;
}
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req;
struct mlx5_ib_alloc_ucontext_resp resp;
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
int gross_uuars;
int num_uars;
int ver;
int uuarn;
int err;
int i;
size_t reqlen;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
memset(&req, 0, sizeof(req));
memset(&resp, 0, sizeof(resp));
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
ver = 2;
else {
mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
return ERR_PTR(-EINVAL);
}
err = ib_copy_from_udata(&req, udata, reqlen);
if (err) {
mlx5_ib_err(dev, "copy failed\n");
return ERR_PTR(err);
}
if (req.reserved) {
mlx5_ib_err(dev, "request corrupted\n");
return ERR_PTR(-EINVAL);
}
if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
return ERR_PTR(-ENOMEM);
}
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
req.total_num_uuars, req.total_num_uuars);
return ERR_PTR(-EINVAL);
}
num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
resp.cache_line_size = L1_CACHE_BYTES;
resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
set_mlx5_flags(&resp.flags, dev->mdev);
if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
uuari = &context->uuari;
mutex_init(&uuari->lock);
uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
if (!uars) {
err = -ENOMEM;
goto out_ctx;
}
uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
sizeof(*uuari->bitmap),
GFP_KERNEL);
if (!uuari->bitmap) {
err = -ENOMEM;
goto out_uar_ctx;
}
/*
* clear all fast path uuars
*/
for (i = 0; i < gross_uuars; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
set_bit(i, uuari->bitmap);
}
uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
if (!uuari->count) {
err = -ENOMEM;
goto out_bitmap;
}
for (i = 0; i < num_uars; i++) {
err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
if (err) {
mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
goto out_uars;
}
}
for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
err = ib_copy_to_udata(udata, &resp,
min_t(size_t, udata->outlen, sizeof(resp)));
if (err)
goto out_uars;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
if (err)
goto out_uars;
}
return &context->ibucontext;
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
kfree(uuari->count);
out_bitmap:
kfree(uuari->bitmap);
out_uar_ctx:
kfree(uars);
out_ctx:
kfree(context);
return ERR_PTR(err);
}
static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET)
mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
}
for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
}
kfree(uuari->count);
kfree(uuari->bitmap);
kfree(uuari->uars);
kfree(context);
return 0;
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
{
return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
}
static int get_command(unsigned long offset)
{
return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
}
static int get_arg(unsigned long offset)
{
return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
}
static int get_index(unsigned long offset)
{
return get_arg(offset);
}
static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
struct mlx5_ib_ucontext *context)
{
unsigned long idx;
phys_addr_t pfn;
if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
(long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
return -EINVAL;
}
idx = get_index(vma->vm_pgoff);
if (idx >= uuari->num_uars) {
mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
idx, uuari->num_uars);
return -EINVAL;
}
pfn = uar_index2pfn(dev, uuari->uars[idx].index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
(unsigned long long)pfn);
vma->vm_page_prot = prot;
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot)) {
mlx5_ib_err(dev, "io remap failed\n");
return -EAGAIN;
}
mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
(long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
return 0;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_REGULAR_PAGE:
return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
true,
uuari, dev, context);
break;
case MLX5_IB_MMAP_WC_PAGE:
return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
true, uuari, dev, context);
break;
case MLX5_IB_MMAP_NC_PAGE:
return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
false, uuari, dev, context);
break;
default:
return -EINVAL;
}
return 0;
}
static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
{
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_mkey_seg *seg;
struct mlx5_core_mr mr;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return -ENOMEM;
seg = &in->seg;
seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
NULL, NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
goto err_in;
}
kfree(in);
*key = mr.key;
return 0;
err_in:
kfree(in);
return err;
}
static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
{
struct mlx5_core_mr mr;
int err;
memset(&mr, 0, sizeof(mr));
mr.key = key;
err = mlx5_core_destroy_mkey(dev->mdev, &mr);
if (err)
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
}
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_pd_resp resp;
struct mlx5_ib_pd *pd;
int err;
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
if (err) {
mlx5_ib_warn(dev, "pd alloc failed\n");
kfree(pd);
return ERR_PTR(err);
}
if (context) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
mlx5_ib_err(dev, "copy failed\n");
mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
} else {
err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
if (err) {
mlx5_ib_err(dev, "alloc mkey failed\n");
mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
kfree(pd);
return ERR_PTR(err);
}
}
return &pd->ibpd;
}
static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
if (!pd->uobject)
free_pa_mkey(mdev, mpd->pa_lkey);
mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
kfree(mpd);
return 0;
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
if (ibqp->qp_type == IB_QPT_RAW_PACKET)
err = -EOPNOTSUPP;
else
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
return err;
}
static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
if (ibqp->qp_type == IB_QPT_RAW_PACKET)
err = -EOPNOTSUPP;
else
err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
return err;
}
static int init_node_data(struct mlx5_ib_dev *dev)
{
int err;
err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
if (err)
return err;
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
}
static ssize_t show_reg_pages(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
}
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_fw_pages,
&dev_attr_reg_pages,
};
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
{
struct mlx5_ib_qp *mqp;
struct mlx5_ib_cq *send_mcq, *recv_mcq;
struct mlx5_core_cq *mcq;
struct list_head cq_armed_list;
unsigned long flags_qp;
unsigned long flags_cq;
unsigned long flags;
mlx5_ib_warn(ibdev, " started\n");
INIT_LIST_HEAD(&cq_armed_list);
/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
spin_lock_irqsave(&mqp->sq.lock, flags_qp);
if (mqp->sq.tail != mqp->sq.head) {
send_mcq = to_mcq(mqp->ibqp.send_cq);
spin_lock_irqsave(&send_mcq->lock, flags_cq);
if (send_mcq->mcq.comp &&
mqp->ibqp.send_cq->comp_handler) {
if (!send_mcq->mcq.reset_notify_added) {
send_mcq->mcq.reset_notify_added = 1;
list_add_tail(&send_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
}
spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
spin_lock_irqsave(&mqp->rq.lock, flags_qp);
/* no handling is needed for SRQ */
if (!mqp->ibqp.srq) {
if (mqp->rq.tail != mqp->rq.head) {
recv_mcq = to_mcq(mqp->ibqp.recv_cq);
spin_lock_irqsave(&recv_mcq->lock, flags_cq);
if (recv_mcq->mcq.comp &&
mqp->ibqp.recv_cq->comp_handler) {
if (!recv_mcq->mcq.reset_notify_added) {
recv_mcq->mcq.reset_notify_added = 1;
list_add_tail(&recv_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&recv_mcq->lock,
flags_cq);
}
}
spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
}
/*At that point all inflight post send were put to be executed as of we
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
mcq->comp(mcq);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
mlx5_ib_warn(ibdev, " ended\n");
return;
}
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
struct ib_event ibev;
u8 port = 0;
switch (event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
break;
case MLX5_DEV_EVENT_PORT_UP:
ibev.event = IB_EVENT_PORT_ACTIVE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_PORT_DOWN:
case MLX5_DEV_EVENT_PORT_INITIALIZED:
ibev.event = IB_EVENT_PORT_ERR;
port = (u8)param;
break;
case MLX5_DEV_EVENT_LID_CHANGE:
ibev.event = IB_EVENT_LID_CHANGE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
ibev.event = IB_EVENT_GID_CHANGE;
port = (u8)param;
break;
case MLX5_DEV_EVENT_CLIENT_REREG:
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
default:
break;
}
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
(port < 1 || port > ibdev->num_ports)) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
return;
}
if (ibdev->ib_active)
ib_dispatch_event(&ibev);
}
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
mlx5_query_ext_port_caps(dev, port);
}
static void config_atomic_responder(struct mlx5_ib_dev *dev,
struct ib_device_attr *props)
{
enum ib_atomic_cap cap = props->atomic_cap;
#if 0
if (cap == IB_ATOMIC_HCA ||
cap == IB_ATOMIC_GLOB)
#endif
dev->enable_atomic_resp = 1;
dev->atomic_cap = cap;
}
enum mlx5_addr_align {
MLX5_ADDR_ALIGN_0 = 0,
MLX5_ADDR_ALIGN_64 = 64,
MLX5_ADDR_ALIGN_128 = 128,
};
static int get_port_caps(struct mlx5_ib_dev *dev)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
int port;
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
if (!dprops)
goto out;
err = mlx5_ib_query_device(&dev->ib_dev, dprops);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
goto out;
}
config_atomic_responder(dev, dprops);
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
port, err);
break;
}
dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
dprops->max_pkeys, pprops->gid_tbl_len);
}
out:
kfree(pprops);
kfree(dprops);
return err;
}
static void destroy_umrc_res(struct mlx5_ib_dev *dev)
{
int err;
err = mlx5_mr_cache_cleanup(dev);
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
ib_dereg_mr(dev->umrc.mr);
ib_dealloc_pd(dev->umrc.pd);
}
enum {
MAX_UMR_WR = 128,
};
static int create_umr_res(struct mlx5_ib_dev *dev)
{
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
pd = ib_alloc_pd(&dev->ib_dev);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
ret = PTR_ERR(pd);
goto error_0;
}
mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(mr)) {
mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
ret = PTR_ERR(mr);
goto error_1;
}
dev->umrc.mr = mr;
dev->umrc.pd = pd;
ret = mlx5_mr_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
goto error_4;
}
return 0;
error_4:
ib_dereg_mr(mr);
error_1:
ib_dealloc_pd(pd);
error_0:
return ret;
}
static int create_dev_resources(struct mlx5_ib_resources *devr)
{
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
int ret = 0;
struct ib_cq_init_attr cq_attr = { .cqe = 1 };
dev = container_of(devr, struct mlx5_ib_dev, devr);
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
goto error0;
}
devr->p0->device = &dev->ib_dev;
devr->p0->uobject = NULL;
atomic_set(&devr->p0->usecnt, 0);
devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
if (IS_ERR(devr->c0)) {
ret = PTR_ERR(devr->c0);
goto error1;
}
devr->c0->device = &dev->ib_dev;
devr->c0->uobject = NULL;
devr->c0->comp_handler = NULL;
devr->c0->event_handler = NULL;
devr->c0->cq_context = NULL;
atomic_set(&devr->c0->usecnt, 0);
devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
goto error2;
}
devr->x0->device = &dev->ib_dev;
devr->x0->inode = NULL;
atomic_set(&devr->x0->usecnt, 0);
mutex_init(&devr->x0->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->x1)) {
ret = PTR_ERR(devr->x1);
goto error3;
}
devr->x1->device = &dev->ib_dev;
devr->x1->inode = NULL;
atomic_set(&devr->x1->usecnt, 0);
mutex_init(&devr->x1->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.xrc.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
if (IS_ERR(devr->s0)) {
ret = PTR_ERR(devr->s0);
goto error4;
}
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->uobject = NULL;
devr->s0->event_handler = NULL;
devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.xrc.cq = devr->c0;
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
if (IS_ERR(devr->s1)) {
ret = PTR_ERR(devr->s1);
goto error5;
}
devr->s1->device = &dev->ib_dev;
devr->s1->pd = devr->p0;
devr->s1->uobject = NULL;
devr->s1->event_handler = NULL;
devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
devr->s1->ext.xrc.cq = devr->c0;
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s1->usecnt, 0);
return 0;
error5:
mlx5_ib_destroy_srq(devr->s0);
error4:
mlx5_ib_dealloc_xrcd(devr->x1);
error3:
mlx5_ib_dealloc_xrcd(devr->x0);
error2:
mlx5_ib_destroy_cq(devr->c0);
error1:
mlx5_ib_dealloc_pd(devr->p0);
error0:
return ret;
}
static void destroy_dev_resources(struct mlx5_ib_resources *devr)
{
mlx5_ib_destroy_srq(devr->s1);
mlx5_ib_destroy_srq(devr->s0);
mlx5_ib_dealloc_xrcd(devr->x0);
mlx5_ib_dealloc_xrcd(devr->x1);
mlx5_ib_destroy_cq(devr->c0);
mlx5_ib_dealloc_pd(devr->p0);
}
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ u32 ret = 0;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_CORE_PORT_IBA_IB;
+
+ ret = RDMA_CORE_PORT_RAW_PACKET;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+ return ret;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+ return ret;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return ret;
+}
+
+static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
+ int err;
+
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
+ if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
static void enable_dc_tracer(struct mlx5_ib_dev *dev)
{
struct device *device = dev->ib_dev.dma_device;
struct mlx5_dc_tracer *dct = &dev->dctr;
int order;
void *tmp;
int size;
int err;
size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
if (size <= PAGE_SIZE)
order = 0;
else
order = 1;
dct->pg = alloc_pages(GFP_KERNEL, order);
if (!dct->pg) {
mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
return;
}
tmp = page_address(dct->pg);
memset(tmp, 0xff, size);
dct->size = size;
dct->order = order;
dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
if (dma_mapping_error(device, dct->dma)) {
mlx5_ib_err(dev, "dma mapping error\n");
goto map_err;
}
err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
if (err) {
mlx5_ib_warn(dev, "failed to enable DC tracer\n");
goto cmd_err;
}
return;
cmd_err:
dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
map_err:
__free_pages(dct->pg, dct->order);
dct->pg = NULL;
}
static void disable_dc_tracer(struct mlx5_ib_dev *dev)
{
struct device *device = dev->ib_dev.dma_device;
struct mlx5_dc_tracer *dct = &dev->dctr;
int err;
if (!dct->pg)
return;
err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
if (err) {
mlx5_ib_warn(dev, "failed to disable DC tracer\n");
return;
}
dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
__free_pages(dct->pg, dct->order);
dct->pg = NULL;
}
enum {
MLX5_DC_CNAK_SIZE = 128,
MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE,
MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128,
MLX5_DC_CNAK_SL = 0,
MLX5_DC_CNAK_VL = 0,
};
static int init_dc_improvements(struct mlx5_ib_dev *dev)
{
if (!mlx5_core_is_pf(dev->mdev))
return 0;
if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
return 0;
enable_dc_tracer(dev);
return 0;
}
static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
{
disable_dc_tracer(dev);
}
static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
{
mlx5_vport_dealloc_q_counter(dev->mdev,
MLX5_INTERFACE_PROTOCOL_IB,
dev->port[port_num].q_cnt_id);
dev->port[port_num].q_cnt_id = 0;
}
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
for (i = 0; i < dev->num_ports; i++)
mlx5_ib_dealloc_q_port_counter(dev, i);
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
{
int i;
int ret;
for (i = 0; i < dev->num_ports; i++) {
ret = mlx5_vport_alloc_q_counter(dev->mdev,
MLX5_INTERFACE_PROTOCOL_IB,
&dev->port[i].q_cnt_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d\n",
i + 1);
goto dealloc_counters;
}
}
return 0;
dealloc_counters:
while (--i >= 0)
mlx5_ib_dealloc_q_port_counter(dev, i);
return ret;
}
struct port_attribute {
struct attribute attr;
ssize_t (*show)(struct mlx5_ib_port *,
struct port_attribute *, char *buf);
ssize_t (*store)(struct mlx5_ib_port *,
struct port_attribute *,
const char *buf, size_t count);
};
struct port_counter_attribute {
struct port_attribute attr;
size_t offset;
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct mlx5_ib_port_sysfs_group *p =
container_of(kobj, struct mlx5_ib_port_sysfs_group,
kobj);
struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
group);
if (!port_attr->show)
return -EIO;
return port_attr->show(mibport, port_attr, buf);
}
static ssize_t show_port_counter(struct mlx5_ib_port *p,
struct port_attribute *port_attr,
char *buf)
{
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
struct port_counter_attribute *counter_attr =
container_of(port_attr, struct port_counter_attribute, attr);
void *out;
int ret;
out = mlx5_vzalloc(outlen);
if (!out)
return -ENOMEM;
ret = mlx5_vport_query_q_counter(p->dev->mdev,
p->q_cnt_id, 0,
out, outlen);
if (ret)
goto free;
ret = sprintf(buf, "%d\n",
be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
free:
kfree(out);
return ret;
}
#define PORT_COUNTER_ATTR(_name) \
struct port_counter_attribute port_counter_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \
.offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \
}
static PORT_COUNTER_ATTR(rx_write_requests);
static PORT_COUNTER_ATTR(rx_read_requests);
static PORT_COUNTER_ATTR(rx_atomic_requests);
static PORT_COUNTER_ATTR(rx_dct_connect);
static PORT_COUNTER_ATTR(out_of_buffer);
static PORT_COUNTER_ATTR(out_of_sequence);
static PORT_COUNTER_ATTR(duplicate_request);
static PORT_COUNTER_ATTR(rnr_nak_retry_err);
static PORT_COUNTER_ATTR(packet_seq_err);
static PORT_COUNTER_ATTR(implied_nak_seq_err);
static PORT_COUNTER_ATTR(local_ack_timeout_err);
static struct attribute *counter_attrs[] = {
&port_counter_attr_rx_write_requests.attr.attr,
&port_counter_attr_rx_read_requests.attr.attr,
&port_counter_attr_rx_atomic_requests.attr.attr,
&port_counter_attr_rx_dct_connect.attr.attr,
&port_counter_attr_out_of_buffer.attr.attr,
&port_counter_attr_out_of_sequence.attr.attr,
&port_counter_attr_duplicate_request.attr.attr,
&port_counter_attr_rnr_nak_retry_err.attr.attr,
&port_counter_attr_packet_seq_err.attr.attr,
&port_counter_attr_implied_nak_seq_err.attr.attr,
&port_counter_attr_local_ack_timeout_err.attr.attr,
NULL
};
static struct attribute_group port_counters_group = {
.name = "counters",
.attrs = counter_attrs
};
static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show
};
static struct kobj_type port_type = {
.sysfs_ops = &port_sysfs_ops,
};
static int add_port_attrs(struct mlx5_ib_dev *dev,
struct kobject *parent,
struct mlx5_ib_port_sysfs_group *port,
u8 port_num)
{
int ret;
ret = kobject_init_and_add(&port->kobj, &port_type,
parent,
"%d", port_num);
if (ret)
return ret;
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
ret = sysfs_create_group(&port->kobj, &port_counters_group);
if (ret)
goto put_kobj;
}
port->enabled = true;
return ret;
put_kobj:
kobject_put(&port->kobj);
return ret;
}
static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
unsigned int num_ports)
{
unsigned int i;
for (i = 0; i < num_ports; i++) {
struct mlx5_ib_port_sysfs_group *port =
&dev->port[i].group;
if (!port->enabled)
continue;
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
sysfs_remove_group(&port->kobj,
&port_counters_group);
kobject_put(&port->kobj);
port->enabled = false;
}
if (dev->ports_parent) {
kobject_put(dev->ports_parent);
dev->ports_parent = NULL;
}
}
static int create_port_attrs(struct mlx5_ib_dev *dev)
{
int ret = 0;
unsigned int i = 0;
struct device *device = &dev->ib_dev.dev;
dev->ports_parent = kobject_create_and_add("mlx5_ports",
&device->kobj);
if (!dev->ports_parent)
return -ENOMEM;
for (i = 0; i < dev->num_ports; i++) {
ret = add_port_attrs(dev,
dev->ports_parent,
&dev->port[i].group,
i + 1);
if (ret)
goto _destroy_ports_attrs;
}
return 0;
_destroy_ports_attrs:
destroy_ports_attrs(dev, i);
return ret;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
int err;
int i;
printk_once(KERN_INFO "%s", mlx5_version);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
if (!dev)
return NULL;
dev->mdev = mdev;
dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port)
goto err_dealloc;
for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
dev->port[i].dev = dev;
dev->port[i].port_num = i;
dev->port[i].port_gone = 0;
memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
}
err = get_port_caps(dev);
if (err)
goto err_free_port;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
if (MLX5_CAP_GEN(mdev, roce)) {
err = mlx5_nic_vport_enable_roce(mdev);
if (err)
goto err_free_port;
} else {
goto err_free_port;
}
}
MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey;
dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dma_device = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
dev->ib_dev.mmap = mlx5_ib_mmap;
dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
dev->ib_dev.create_ah = mlx5_ib_create_ah;
dev->ib_dev.query_ah = mlx5_ib_query_ah;
dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
dev->ib_dev.create_srq = mlx5_ib_create_srq;
dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
dev->ib_dev.query_srq = mlx5_ib_query_srq;
dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
dev->ib_dev.create_qp = mlx5_ib_create_qp;
dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
dev->ib_dev.query_qp = mlx5_ib_query_qp;
dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
dev->ib_dev.post_send = mlx5_ib_post_send;
dev->ib_dev.post_recv = mlx5_ib_post_recv;
dev->ib_dev.create_cq = mlx5_ib_create_cq;
dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
+ dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
err = init_node_data(dev);
if (err)
goto err_disable_roce;
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
err = create_dev_resources(&dev->devr);
if (err)
goto err_disable_roce;
err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
goto err_q_cnt;
err = create_umr_res(dev);
if (err)
goto err_dev;
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB) {
if (init_dc_improvements(dev))
mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
}
err = create_port_attrs(dev);
if (err)
goto err_dc;
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
goto err_port_attrs;
}
if (1) {
struct thread *rl_thread = NULL;
struct proc *rl_proc = NULL;
for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
(void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
}
}
dev->ib_active = true;
return dev;
err_port_attrs:
destroy_ports_attrs(dev, dev->num_ports);
err_dc:
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB)
cleanup_dc_improvements(dev);
destroy_umrc_res(dev);
err_dev:
ib_unregister_device(&dev->ib_dev);
err_q_cnt:
mlx5_ib_dealloc_q_counters(dev);
err_odp:
destroy_dev_resources(&dev->devr);
err_disable_roce:
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
mlx5_nic_vport_disable_roce(mdev);
err_free_port:
kfree(dev->port);
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
return NULL;
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
int i;
for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
dev->port[i].port_gone = 1;
while (dev->port[i].port_gone != 2)
pause("W", hz);
}
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
device_remove_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
}
destroy_ports_attrs(dev, dev->num_ports);
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB)
cleanup_dc_improvements(dev);
mlx5_ib_dealloc_q_counters(dev);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
destroy_dev_resources(&dev->devr);
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
mlx5_nic_vport_disable_roce(mdev);
kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
static int __init mlx5_ib_init(void)
{
int err;
if (deprecated_prof_sel != 2)
printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
err = mlx5_register_interface(&mlx5_ib_interface);
if (err)
goto clean_odp;
mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
if (!mlx5_ib_wq) {
printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
goto err_unreg;
}
return err;
err_unreg:
mlx5_unregister_interface(&mlx5_ib_interface);
clean_odp:
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
destroy_workqueue(mlx5_ib_wq);
mlx5_unregister_interface(&mlx5_ib_interface);
}
module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
index 3e691dc206c4..cdbf1a7382fc 100644
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c
@@ -1,2995 +1,2995 @@
/*-
* Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <linux/module.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
#include "user.h"
#include <dev/mlx5/mlx5_core/transobj.h>
#include <sys/priv.h>
#define IPV6_DEFAULT_HOPLIMIT 64
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state);
/* not supported currently */
static int workqueue_signature;
enum {
MLX5_IB_ACK_REQ_FREQ = 8,
};
enum {
MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83,
MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
MLX5_IB_LINK_TYPE_IB = 0,
MLX5_IB_LINK_TYPE_ETH = 1
};
enum {
MLX5_IB_SQ_STRIDE = 6,
MLX5_IB_CACHE_LINE_SIZE = 64,
};
enum {
MLX5_RQ_NUM_STATE = MLX5_RQC_STATE_ERR + 1,
MLX5_SQ_NUM_STATE = MLX5_SQC_STATE_ERR + 1,
MLX5_QP_STATE = MLX5_QP_NUM_STATE + 1,
MLX5_QP_STATE_BAD = MLX5_QP_STATE + 1,
};
static const u32 mlx5_ib_opcode[] = {
[IB_WR_SEND] = MLX5_OPCODE_SEND,
[IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
[IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
[IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
[IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
[IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
[IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
[IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
};
struct umr_wr {
u64 virt_addr;
struct ib_pd *pd;
unsigned int page_shift;
unsigned int npages;
u32 length;
int access_flags;
u32 mkey;
};
static int is_qp0(enum ib_qp_type qp_type)
{
return qp_type == IB_QPT_SMI;
}
static int is_qp1(enum ib_qp_type qp_type)
{
return qp_type == IB_QPT_GSI;
}
static int is_sqp(enum ib_qp_type qp_type)
{
return is_qp0(qp_type) || is_qp1(qp_type);
}
static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
{
return mlx5_buf_offset(&qp->buf, offset);
}
static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
{
return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
}
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
{
return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
}
static int
query_wqe_idx(struct mlx5_ib_qp *qp)
{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int ret;
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
if (!outb)
return -ENOMEM;
context = &outb->ctx;
mutex_lock(&qp->mutex);
ret = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
if (ret)
goto out_free;
ret = be16_to_cpu(context->hw_sq_wqe_counter) & (qp->sq.wqe_cnt - 1);
out_free:
mutex_unlock(&qp->mutex);
kfree(outb);
return ret;
}
static int mlx5_handle_sig_pipelining(struct mlx5_ib_qp *qp)
{
int wqe_idx;
wqe_idx = query_wqe_idx(qp);
if (wqe_idx < 0) {
printf("mlx5_ib: ERR: ""Failed to query QP 0x%x wqe index\n", qp->mqp.qpn);
return wqe_idx;
}
if (qp->sq.swr_ctx[wqe_idx].sig_piped) {
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_wqe_ctrl_seg *cwqe;
cwqe = mlx5_get_send_wqe(qp, wqe_idx);
cwqe->opmod_idx_opcode = cpu_to_be32(be32_to_cpu(cwqe->opmod_idx_opcode) & 0xffffff00);
qp->sq.swr_ctx[wqe_idx].w_list.opcode |= MLX5_OPCODE_SIGNATURE_CANCELED;
mlx5_ib_dbg(dev, "Cancel QP 0x%x wqe_index 0x%x\n",
qp->mqp.qpn, wqe_idx);
}
return 0;
}
static void mlx5_ib_sqd_work(struct work_struct *work)
{
struct mlx5_ib_sqd *sqd;
struct mlx5_ib_qp *qp;
struct ib_qp_attr qp_attr;
sqd = container_of(work, struct mlx5_ib_sqd, work);
qp = sqd->qp;
if (mlx5_handle_sig_pipelining(qp))
goto out;
mutex_lock(&qp->mutex);
if (__mlx5_ib_modify_qp(&qp->ibqp, &qp_attr, 0, IB_QPS_SQD, IB_QPS_RTS))
printf("mlx5_ib: ERR: ""Failed to resume QP 0x%x\n", qp->mqp.qpn);
mutex_unlock(&qp->mutex);
out:
kfree(sqd);
}
static void mlx5_ib_sigerr_sqd_event(struct mlx5_ib_qp *qp)
{
struct mlx5_ib_sqd *sqd;
sqd = kzalloc(sizeof(*sqd), GFP_ATOMIC);
if (!sqd)
return;
sqd->qp = qp;
INIT_WORK(&sqd->work, mlx5_ib_sqd_work);
queue_work(mlx5_ib_wq, &sqd->work);
}
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
{
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
struct ib_event event;
if (type == MLX5_EVENT_TYPE_SQ_DRAINED &&
to_mibqp(qp)->state != IB_QPS_SQD) {
mlx5_ib_sigerr_sqd_event(to_mibqp(qp));
return;
}
if (type == MLX5_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
if (ibqp->event_handler) {
event.device = ibqp->device;
event.element.qp = ibqp;
switch (type) {
case MLX5_EVENT_TYPE_PATH_MIG:
event.event = IB_EVENT_PATH_MIG;
break;
case MLX5_EVENT_TYPE_COMM_EST:
event.event = IB_EVENT_COMM_EST;
break;
case MLX5_EVENT_TYPE_SQ_DRAINED:
event.event = IB_EVENT_SQ_DRAINED;
break;
case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
event.event = IB_EVENT_QP_LAST_WQE_REACHED;
break;
case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
event.event = IB_EVENT_QP_FATAL;
break;
case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
event.event = IB_EVENT_PATH_MIG_ERR;
break;
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
event.event = IB_EVENT_QP_REQ_ERR;
break;
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
printf("mlx5_ib: WARN: ""mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
return;
}
ibqp->event_handler(&event, ibqp->qp_context);
}
}
static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
{
int wqe_size;
int wq_size;
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)))
return -EINVAL;
if (!has_rq) {
qp->rq.max_gs = 0;
qp->rq.wqe_cnt = 0;
qp->rq.wqe_shift = 0;
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
} else {
if (ucmd) {
qp->rq.wqe_cnt = ucmd->rq_wqe_count;
qp->rq.wqe_shift = ucmd->rq_wqe_shift;
qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
} else {
wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
wqe_size = roundup_pow_of_two(wqe_size);
wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
qp->rq.wqe_cnt = wq_size / wqe_size;
if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) {
mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
wqe_size,
MLX5_CAP_GEN(dev->mdev,
max_wqe_sz_rq));
return -EINVAL;
}
qp->rq.wqe_shift = ilog2(wqe_size);
qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
}
}
return 0;
}
static int sq_overhead(enum ib_qp_type qp_type)
{
int size = 0;
switch (qp_type) {
case IB_QPT_XRC_INI:
size += sizeof(struct mlx5_wqe_xrc_seg);
/* fall through */
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_atomic_seg) +
sizeof(struct mlx5_wqe_raddr_seg) +
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_mkey_seg);
break;
case IB_QPT_XRC_TGT:
return 0;
case IB_QPT_UC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_raddr_seg) +
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_mkey_seg);
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_datagram_seg);
break;
default:
return -EINVAL;
}
return size;
}
static int calc_send_wqe(struct ib_qp_init_attr *attr)
{
int inl_size = 0;
int size;
size = sq_overhead(attr->qp_type);
if (size < 0)
return size;
if (attr->cap.max_inline_data) {
inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
attr->cap.max_inline_data;
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
{
int max_sge;
if (attr->qp_type == IB_QPT_RC)
max_sge = (min_t(int, wqe_size, 512) -
sizeof(struct mlx5_wqe_ctrl_seg) -
sizeof(struct mlx5_wqe_raddr_seg)) /
sizeof(struct mlx5_wqe_data_seg);
else if (attr->qp_type == IB_QPT_XRC_INI)
max_sge = (min_t(int, wqe_size, 512) -
sizeof(struct mlx5_wqe_ctrl_seg) -
sizeof(struct mlx5_wqe_xrc_seg) -
sizeof(struct mlx5_wqe_raddr_seg)) /
sizeof(struct mlx5_wqe_data_seg);
else
max_sge = (wqe_size - sq_overhead(attr->qp_type)) /
sizeof(struct mlx5_wqe_data_seg);
return min_t(int, max_sge, wqe_size - sq_overhead(attr->qp_type) /
sizeof(struct mlx5_wqe_data_seg));
}
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
struct mlx5_ib_qp *qp)
{
int wqe_size;
int wq_size;
if (!attr->cap.max_send_wr)
return 0;
wqe_size = calc_send_wqe(attr);
mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
if (wqe_size < 0)
return wqe_size;
if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
mlx5_ib_warn(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
return -EINVAL;
}
qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * (u64)wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
mlx5_ib_warn(dev, "wqe count(%d) exceeds limits(%d)\n",
qp->sq.wqe_cnt,
1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
return -ENOMEM;
}
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.max_gs = get_send_sge(attr, wqe_size);
if (qp->sq.max_gs < attr->cap.max_send_sge) {
mlx5_ib_warn(dev, "max sge(%d) exceeds limits(%d)\n",
qp->sq.max_gs, attr->cap.max_send_sge);
return -ENOMEM;
}
attr->cap.max_send_sge = qp->sq.max_gs;
qp->sq.max_post = wq_size / wqe_size;
attr->cap.max_send_wr = qp->sq.max_post;
return wq_size;
}
static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
struct mlx5_ib_create_qp *ucmd,
struct ib_qp_init_attr *attr)
{
int desc_sz = 1 << qp->sq.wqe_shift;
if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
return -EINVAL;
}
if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
ucmd->sq_wqe_count, ucmd->sq_wqe_count);
return -EINVAL;
}
qp->sq.wqe_cnt = ucmd->sq_wqe_count;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
qp->sq.wqe_cnt,
1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
return -EINVAL;
}
if (attr->qp_type == IB_QPT_RAW_PACKET) {
qp->buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->sq_buf_size = qp->sq.wqe_cnt << 6;
} else {
qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
(qp->sq.wqe_cnt << 6);
qp->sq_buf_size = 0;
}
return 0;
}
static int qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI ||
attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
!attr->cap.max_recv_wr)
return 0;
return 1;
}
static int first_med_uuar(void)
{
return 1;
}
static int next_uuar(int n)
{
n++;
while (((n % 4) & 2))
n++;
return n;
}
static int num_med_uuar(struct mlx5_uuar_info *uuari)
{
int n;
n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
uuari->num_low_latency_uuars - 1;
return n >= 0 ? n : 0;
}
static int max_uuari(struct mlx5_uuar_info *uuari)
{
return uuari->num_uars * 4;
}
static int first_hi_uuar(struct mlx5_uuar_info *uuari)
{
int med;
int i;
int t;
med = num_med_uuar(uuari);
for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
t++;
if (t == med)
return next_uuar(i);
}
return 0;
}
static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
{
int i;
for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
if (!test_bit(i, uuari->bitmap)) {
set_bit(i, uuari->bitmap);
uuari->count[i]++;
return i;
}
}
return -ENOMEM;
}
static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
{
int minidx = first_med_uuar();
int i;
for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
if (uuari->count[i] < uuari->count[minidx])
minidx = i;
}
uuari->count[minidx]++;
return minidx;
}
static int alloc_uuar(struct mlx5_uuar_info *uuari,
enum mlx5_ib_latency_class lat)
{
int uuarn = -EINVAL;
mutex_lock(&uuari->lock);
switch (lat) {
case MLX5_IB_LATENCY_CLASS_LOW:
uuarn = 0;
uuari->count[uuarn]++;
break;
case MLX5_IB_LATENCY_CLASS_MEDIUM:
if (uuari->ver < 2)
uuarn = -ENOMEM;
else
uuarn = alloc_med_class_uuar(uuari);
break;
case MLX5_IB_LATENCY_CLASS_HIGH:
if (uuari->ver < 2)
uuarn = -ENOMEM;
else
uuarn = alloc_high_class_uuar(uuari);
break;
case MLX5_IB_LATENCY_CLASS_FAST_PATH:
uuarn = 2;
break;
}
mutex_unlock(&uuari->lock);
return uuarn;
}
static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
{
clear_bit(uuarn, uuari->bitmap);
--uuari->count[uuarn];
}
static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
{
clear_bit(uuarn, uuari->bitmap);
--uuari->count[uuarn];
}
static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
{
int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
int high_uuar = nuuars - uuari->num_low_latency_uuars;
mutex_lock(&uuari->lock);
if (uuarn == 0) {
--uuari->count[uuarn];
goto out;
}
if (uuarn < high_uuar) {
free_med_class_uuar(uuari, uuarn);
goto out;
}
free_high_class_uuar(uuari, uuarn);
out:
mutex_unlock(&uuari->lock);
}
static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
{
switch (state) {
case IB_QPS_RESET: return MLX5_QP_STATE_RST;
case IB_QPS_INIT: return MLX5_QP_STATE_INIT;
case IB_QPS_RTR: return MLX5_QP_STATE_RTR;
case IB_QPS_RTS: return MLX5_QP_STATE_RTS;
case IB_QPS_SQD: return MLX5_QP_STATE_SQD;
case IB_QPS_SQE: return MLX5_QP_STATE_SQER;
case IB_QPS_ERR: return MLX5_QP_STATE_ERR;
default: return -1;
}
}
static int to_mlx5_st(enum ib_qp_type type)
{
switch (type) {
case IB_QPT_RC: return MLX5_QP_ST_RC;
case IB_QPT_UC: return MLX5_QP_ST_UC;
case IB_QPT_UD: return MLX5_QP_ST_UD;
case IB_QPT_XRC_INI:
case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case IB_QPT_GSI: return MLX5_QP_ST_QP1;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
case IB_QPT_RAW_PACKET:
case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_MAX:
default: return -EINVAL;
}
}
static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
{
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
}
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
struct mlx5_create_qp_mbox_in **in,
int *inlen,
struct mlx5_exp_ib_create_qp *ucmd)
{
struct mlx5_exp_ib_create_qp_resp resp;
struct mlx5_ib_ucontext *context;
int page_shift = 0;
int uar_index;
int npages;
u32 offset = 0;
int uuarn;
int ncont = 0;
int err;
context = to_mucontext(pd->uobject->context);
memset(&resp, 0, sizeof(resp));
resp.size_of_prefix = offsetof(struct mlx5_exp_ib_create_qp_resp, prefix_reserved);
/*
* TBD: should come from the verbs when we have the API
*/
if (ucmd->exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_WC_UAR_IDX) {
if (ucmd->exp.wc_uar_index == MLX5_EXP_CREATE_QP_DB_ONLY_UUAR) {
/* Assign LATENCY_CLASS_LOW (DB only UUAR) to this QP */
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
if (uuarn < 0) {
mlx5_ib_warn(dev, "DB only uuar allocation failed\n");
return uuarn;
}
uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
} else if (ucmd->exp.wc_uar_index >= MLX5_IB_MAX_CTX_DYNAMIC_UARS ||
context->dynamic_wc_uar_index[ucmd->exp.wc_uar_index] ==
MLX5_IB_INVALID_UAR_INDEX) {
mlx5_ib_warn(dev, "dynamic uuar allocation failed\n");
return -EINVAL;
} else {
uar_index = context->dynamic_wc_uar_index[ucmd->exp.wc_uar_index];
uuarn = MLX5_EXP_INVALID_UUAR;
}
} else {
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
mlx5_ib_dbg(dev, "reverting to medium latency\n");
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
mlx5_ib_dbg(dev, "reverting to high latency\n");
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
if (uuarn < 0) {
mlx5_ib_warn(dev, "uuar allocation failed\n");
return uuarn;
}
}
}
uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
}
mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
qp->rq.offset = 0;
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
err = set_user_buf_size(dev, qp, (struct mlx5_ib_create_qp *)ucmd, attr);
if (err)
goto err_uuar;
if (ucmd->buf_addr && qp->buf_size) {
qp->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
mlx5_ib_warn(dev, "umem_get failed\n");
err = PTR_ERR(qp->umem);
goto err_uuar;
}
} else {
qp->umem = NULL;
}
if (qp->umem) {
mlx5_ib_cont_pages(qp->umem, ucmd->buf_addr, &npages, &page_shift,
&ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &offset);
if (err) {
mlx5_ib_warn(dev, "bad offset\n");
goto err_umem;
}
mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
(unsigned long long)ucmd->buf_addr, qp->buf_size,
npages, page_shift, ncont, offset);
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_umem;
}
if (qp->umem)
mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
resp.uuar_index = uuarn;
qp->uuarn = uuarn;
err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
if (err) {
mlx5_ib_warn(dev, "map failed\n");
goto err_free;
}
err = ib_copy_to_udata(udata, &resp, sizeof(struct mlx5_ib_create_qp_resp));
if (err) {
mlx5_ib_err(dev, "copy failed\n");
goto err_unmap;
}
qp->create_type = MLX5_QP_USER;
return 0;
err_unmap:
mlx5_ib_db_unmap_user(context, &qp->db);
err_free:
kvfree(*in);
err_umem:
if (qp->umem)
ib_umem_release(qp->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
return err;
}
static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
if (qp->umem)
ib_umem_release(qp->umem);
if (qp->sq_umem)
ib_umem_release(qp->sq_umem);
/*
* Free only the UUARs handled by the kernel.
* UUARs of UARs allocated dynamically are handled by user.
*/
if (qp->uuarn != MLX5_EXP_INVALID_UUAR)
free_uuar(&context->uuari, qp->uuarn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
struct mlx5_create_qp_mbox_in **in, int *inlen)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
int uar_index;
int uuarn;
int err;
uuari = &dev->mdev->priv.uuari;
if (init_attr->create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
return -EINVAL;
uuarn = alloc_uuar(uuari, lc);
if (uuarn < 0) {
mlx5_ib_warn(dev, "\n");
return -ENOMEM;
}
qp->bf = &uuari->bfs[uuarn];
uar_index = qp->bf->uar->index;
err = calc_sq_size(dev, init_attr, qp);
if (err < 0) {
mlx5_ib_warn(dev, "err %d\n", err);
goto err_uuar;
}
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
if (err) {
mlx5_ib_warn(dev, "err %d\n", err);
goto err_uuar;
}
qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
*in = mlx5_vzalloc(*inlen);
if (!*in) {
err = -ENOMEM;
goto err_buf;
}
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
/* Set "fast registration enabled" for all kernel QPs */
(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
mlx5_fill_page_array(&qp->buf, (*in)->pas);
err = mlx5_db_alloc(dev->mdev, &qp->db);
if (err) {
mlx5_ib_warn(dev, "err %d\n", err);
goto err_free;
}
qp->sq.swr_ctx = kcalloc(qp->sq.wqe_cnt, sizeof(*qp->sq.swr_ctx),
GFP_KERNEL);
qp->rq.rwr_ctx = kcalloc(qp->rq.wqe_cnt, sizeof(*qp->rq.rwr_ctx),
GFP_KERNEL);
if (!qp->sq.swr_ctx || !qp->rq.rwr_ctx) {
err = -ENOMEM;
goto err_wrid;
}
qp->create_type = MLX5_QP_KERNEL;
return 0;
err_wrid:
mlx5_db_free(dev->mdev, &qp->db);
kfree(qp->sq.swr_ctx);
kfree(qp->rq.rwr_ctx);
err_free:
kvfree(*in);
err_buf:
mlx5_buf_free(dev->mdev, &qp->buf);
err_uuar:
free_uuar(&dev->mdev->priv.uuari, uuarn);
return err;
}
static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
mlx5_db_free(dev->mdev, &qp->db);
kfree(qp->sq.swr_ctx);
kfree(qp->rq.rwr_ctx);
mlx5_buf_free(dev->mdev, &qp->buf);
free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
}
static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
{
enum ib_qp_type qt = attr->qp_type;
if (attr->srq || (qt == IB_QPT_XRC_TGT) || (qt == IB_QPT_XRC_INI))
return cpu_to_be32(MLX5_SRQ_RQ);
else if (!qp->has_rq)
return cpu_to_be32(MLX5_ZERO_LEN_RQ);
else
return cpu_to_be32(MLX5_NON_ZERO_RQ);
}
static int is_connected(enum ib_qp_type qp_type)
{
if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
return 1;
return 0;
}
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
{
switch (qp_type) {
case IB_QPT_XRC_TGT:
*send_cq = NULL;
*recv_cq = NULL;
break;
case IB_QPT_XRC_INI:
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = NULL;
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
case IB_QPT_MAX:
default:
*send_cq = NULL;
*recv_cq = NULL;
break;
}
}
enum {
MLX5_QP_END_PAD_MODE_ALIGN = MLX5_WQ_END_PAD_MODE_ALIGN,
MLX5_QP_END_PAD_MODE_NONE = MLX5_WQ_END_PAD_MODE_NONE,
};
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_create_qp_mbox_in *in = NULL;
struct mlx5_exp_ib_create_qp ucmd;
struct mlx5_ib_create_qp *pucmd = NULL;
struct mlx5_ib_cq *send_cq;
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
int inlen = sizeof(*in);
size_t ucmd_size;
int err;
int st;
u32 uidx;
void *qpc;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
mlx5_ib_warn(dev, "block multicast loopback isn't supported\n");
return -EINVAL;
} else {
qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
}
}
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
if (pd && pd->uobject) {
memset(&ucmd, 0, sizeof(ucmd));
ucmd_size = sizeof(struct mlx5_ib_create_qp);
if (ucmd_size > offsetof(struct mlx5_exp_ib_create_qp, size_of_prefix)) {
mlx5_ib_warn(dev, "mlx5_ib_create_qp is too big to fit as prefix of mlx5_exp_ib_create_qp\n");
return -EINVAL;
}
err = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, ucmd_size));
if (err) {
mlx5_ib_err(dev, "copy failed\n");
return err;
}
pucmd = (struct mlx5_ib_create_qp *)&ucmd;
if (ucmd.exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_UIDX)
uidx = ucmd.exp.uidx;
else
uidx = 0xffffff;
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
} else {
qp->wq_sig = !!workqueue_signature;
uidx = 0xffffff;
}
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
qp, (pd && pd->uobject) ? pucmd : NULL);
if (err) {
mlx5_ib_warn(dev, "err %d\n", err);
return err;
}
if (pd) {
if (pd->uobject) {
__u32 max_wqes =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
mlx5_ib_warn(dev, "invalid rq params\n");
return -EINVAL;
}
if (ucmd.sq_wqe_count > max_wqes) {
mlx5_ib_warn(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
ucmd.sq_wqe_count, max_wqes);
return -EINVAL;
}
err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
&inlen, &ucmd);
if (err)
mlx5_ib_warn(dev, "err %d\n", err);
} else {
if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
mlx5_ib_warn(dev, "Raw Eth QP is disabled for Kernel consumers\n");
return -EINVAL;
}
err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
if (err)
mlx5_ib_warn(dev, "err %d\n", err);
else
qp->pa_lkey = to_mpd(pd)->pa_lkey;
}
if (err)
return err;
} else {
in = mlx5_vzalloc(sizeof(*in));
if (!in)
return -ENOMEM;
qp->create_type = MLX5_QP_EMPTY;
}
if (is_sqp(init_attr->qp_type))
qp->port = init_attr->port_num;
st = to_mlx5_st(init_attr->qp_type);
if (st < 0) {
mlx5_ib_warn(dev, "invalid service type\n");
err = st;
goto err_create;
}
in->ctx.flags |= cpu_to_be32(st << 16 | MLX5_QP_PM_MIGRATED << 11);
in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
if (qp->wq_sig)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
if (qp->flags & MLX5_IB_QP_CAP_RX_END_PADDING)
in->ctx.flags |= cpu_to_be32(MLX5_QP_END_PAD_MODE_ALIGN << 2);
else
in->ctx.flags |= cpu_to_be32(MLX5_QP_END_PAD_MODE_NONE << 2);
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
if (rcqe_sz == 128) {
in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
} else {
in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
}
if (init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
in->ctx.cs_req = 0;
} else {
if (scqe_sz == 128)
in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
else
in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
}
}
if (qp->rq.wqe_cnt) {
in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
}
in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
if (qp->sq.wqe_cnt)
in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
else
in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
/* Set default resources */
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
break;
case IB_QPT_XRC_INI:
in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
} else {
in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
}
}
if (init_attr->send_cq)
in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
if (init_attr->recv_cq)
in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
if (MLX5_CAP_GEN(mdev, cqe_version)) {
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
/* 0xffffff means we ask to work with cqe version 0 */
MLX5_SET(qpc, qpc, user_index, uidx);
}
if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
mlx5_ib_warn(dev, "Raw Ethernet QP is allowed only for Ethernet link layer\n");
return -ENOSYS;
}
if (ucmd.exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_SQ_BUFF_ADD) {
qp->sq_buf_addr = ucmd.exp.sq_buf_addr;
} else {
mlx5_ib_warn(dev, "Raw Ethernet QP needs SQ buff address\n");
return -EINVAL;
}
err = -EOPNOTSUPP;
} else {
err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
qp->mqp.event = mlx5_ib_qp_event;
}
if (err) {
mlx5_ib_warn(dev, "create qp failed\n");
goto err_create;
}
kvfree(in);
/* Hardware wants QPN written in big-endian order (after
* shifting) for send doorbell. Precompute this value to save
* a little bit when posting sends.
*/
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
/* Maintain device to QPs access, needed for further handling via reset
* flow
*/
list_add_tail(&qp->qps_list, &dev->qp_list);
/* Maintain CQ to QPs access, needed for further handling via reset flow
*/
if (send_cq)
list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
if (recv_cq)
list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
return 0;
err_create:
if (qp->create_type == MLX5_QP_USER)
destroy_qp_user(pd, qp);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
kvfree(in);
return err;
}
static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
if (send_cq) {
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock,
SINGLE_DEPTH_NESTING);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else {
spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock,
SINGLE_DEPTH_NESTING);
}
} else {
spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
}
} else if (recv_cq) {
spin_lock(&recv_cq->lock);
__acquire(&send_cq->lock);
} else {
__acquire(&send_cq->lock);
__acquire(&recv_cq->lock);
}
}
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
__releases(&send_cq->lock) __releases(&recv_cq->lock)
{
if (send_cq) {
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock(&send_cq->lock);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
__release(&recv_cq->lock);
spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
spin_unlock(&recv_cq->lock);
}
} else {
__release(&recv_cq->lock);
spin_unlock(&send_cq->lock);
}
} else if (recv_cq) {
__release(&send_cq->lock);
spin_unlock(&recv_cq->lock);
} else {
__release(&recv_cq->lock);
__release(&send_cq->lock);
}
}
static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
{
return to_mpd(qp->ibqp.pd);
}
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_modify_qp_mbox_in *in;
unsigned long flags;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
if (mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, in, 0,
&qp->mqp))
mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
qp->mqp.qpn);
}
}
get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
/* del from lists under both locks above to protect reset flow paths */
list_del(&qp->qps_list);
if (send_cq)
list_del(&qp->cq_send_list);
if (recv_cq)
list_del(&qp->cq_recv_list);
if (qp->create_type == MLX5_QP_KERNEL) {
__mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
__mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
}
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
} else {
err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
if (err)
mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
qp->mqp.qpn);
}
kfree(in);
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
destroy_qp_user(&get_pd(qp)->ibpd, qp);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
{
switch (type) {
case IB_QPT_SMI:
return "IB_QPT_SMI";
case IB_QPT_GSI:
return "IB_QPT_GSI";
case IB_QPT_RC:
return "IB_QPT_RC";
case IB_QPT_UC:
return "IB_QPT_UC";
case IB_QPT_UD:
return "IB_QPT_UD";
case IB_QPT_RAW_IPV6:
return "IB_QPT_RAW_IPV6";
case IB_QPT_RAW_ETHERTYPE:
return "IB_QPT_RAW_ETHERTYPE";
case IB_QPT_XRC_INI:
return "IB_QPT_XRC_INI";
case IB_QPT_XRC_TGT:
return "IB_QPT_XRC_TGT";
case IB_QPT_RAW_PACKET:
return "IB_QPT_RAW_PACKET";
case IB_QPT_MAX:
default:
return "Invalid QP type";
}
}
struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev;
struct mlx5_ib_qp *qp;
u16 xrcdn = 0;
int err;
u32 rcqn;
u32 scqn;
init_attr->qpg_type = IB_QPG_NONE;
if (pd) {
dev = to_mdev(pd->device);
} else {
/* being cautious here */
if (init_attr->qp_type != IB_QPT_XRC_TGT) {
printf("mlx5_ib: WARN: ""%s: no PD for transport %s\n", __func__, ib_qp_type_str(init_attr->qp_type));
return ERR_PTR(-EINVAL);
}
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
}
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
case IB_QPT_XRC_INI:
if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
mlx5_ib_warn(dev, "XRC not supported\n");
return ERR_PTR(-ENOSYS);
}
init_attr->recv_cq = NULL;
if (init_attr->qp_type == IB_QPT_XRC_TGT) {
xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
init_attr->send_cq = NULL;
}
/* fall through */
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
err = create_qp_common(dev, pd, init_attr, udata, qp);
if (err) {
mlx5_ib_warn(dev, "create_qp_common failed\n");
kfree(qp);
return ERR_PTR(err);
}
if (is_qp0(init_attr->qp_type))
qp->ibqp.qp_num = 0;
else if (is_qp1(init_attr->qp_type))
qp->ibqp.qp_num = 1;
else
qp->ibqp.qp_num = qp->mqp.qpn;
rcqn = init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1;
scqn = init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1;
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
qp->ibqp.qp_num, qp->mqp.qpn, rcqn, scqn);
qp->xrcdn = xrcdn;
break;
case IB_QPT_RAW_IPV6:
case IB_QPT_MAX:
default:
mlx5_ib_warn(dev, "unsupported qp type %d\n",
init_attr->qp_type);
/* Don't support raw QPs */
return ERR_PTR(-EINVAL);
}
return &qp->ibqp;
}
int mlx5_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
destroy_qp_common(dev, mqp);
kfree(mqp);
return 0;
}
static u32 atomic_mode_qp(struct mlx5_ib_dev *dev)
{
unsigned long mask;
unsigned long tmp;
mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp) &
MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
tmp = find_last_bit(&mask, BITS_PER_LONG);
if (tmp < 2 || tmp >= BITS_PER_LONG)
return MLX5_ATOMIC_MODE_NONE;
if (tmp == 2)
return MLX5_ATOMIC_MODE_CX;
return tmp << MLX5_ATOMIC_MODE_OFF;
}
static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
int attr_mask)
{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
u32 hw_access_flags = 0;
u8 dest_rd_atomic;
u32 access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
dest_rd_atomic = qp->resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
access_flags = qp->atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
if (access_flags & IB_ACCESS_REMOTE_READ)
hw_access_flags |= MLX5_QP_BIT_RRE;
if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
hw_access_flags |= (MLX5_QP_BIT_RAE |
atomic_mode_qp(dev));
if (access_flags & IB_ACCESS_REMOTE_WRITE)
hw_access_flags |= MLX5_QP_BIT_RWE;
return cpu_to_be32(hw_access_flags);
}
enum {
MLX5_PATH_FLAG_FL = 1 << 0,
MLX5_PATH_FLAG_FREE_AR = 1 << 1,
MLX5_PATH_FLAG_COUNTER = 1 << 2,
};
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
{
if (rate == IB_RATE_PORT_CURRENT) {
return 0;
} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
return -EINVAL;
} else {
while (rate != IB_RATE_2_5_GBPS &&
!(1 << (rate + MLX5_STAT_RATE_OFFSET) &
MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
--rate;
}
return rate + MLX5_STAT_RATE_OFFSET;
}
static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr,
int alt)
{
enum rdma_link_layer ll = dev->ib_dev.get_link_layer(&dev->ib_dev,
port);
int err;
int gid_type;
if ((ll == IB_LINK_LAYER_ETHERNET) || (ah->ah_flags & IB_AH_GRH)) {
- int len = dev->ib_dev.gid_tbl_len[port - 1];
+ int len = dev->mdev->port_caps[port - 1].gid_table_len;
if (ah->grh.sgid_index >= len) {
printf("mlx5_ib: ERR: ""sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, len - 1);
return -EINVAL;
}
}
if (ll == IB_LINK_LAYER_ETHERNET) {
if (!(ah->ah_flags & IB_AH_GRH))
return -EINVAL;
err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
&gid_type);
if (err)
return err;
memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
ah->grh.sgid_index,
0);
path->dci_cfi_prio_sl = (ah->sl & 0xf) << 4;
} else {
path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->grh_mlid = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
if (ah->ah_flags & IB_AH_GRH)
path->grh_mlid |= 1 << 7;
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = cpu_to_be16(alt ?
attr->alt_pkey_index :
attr->pkey_index);
path->dci_cfi_prio_sl = ah->sl & 0xf;
}
path->fl_free_ar |= (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
if (ah->ah_flags & IB_AH_GRH) {
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
cpu_to_be32((ah->grh.traffic_class << 20) |
(ah->grh.flow_label));
memcpy(path->rgid, ah->grh.dgid.raw, 16);
}
err = ib_rate_to_mlx5(dev, ah->static_rate);
if (err < 0)
return err;
path->static_rate = err;
path->port = port;
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = alt ? attr->alt_timeout << 3 : attr->timeout << 3;
return 0;
}
static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
[MLX5_QP_STATE_INIT] = {
[MLX5_QP_STATE_INIT] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_PRI_PORT,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_PRI_PORT,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_PRI_PORT,
[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_PRI_PORT |
MLX5_QP_OPTPAR_DC_KEY |
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_RAE,
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX,
[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_DC_KEY,
},
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PM_STATE |
MLX5_QP_OPTPAR_RNR_TIMEOUT,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PM_STATE,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY |
MLX5_QP_OPTPAR_PM_STATE |
MLX5_QP_OPTPAR_RAE,
},
},
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RNR_TIMEOUT |
MLX5_QP_OPTPAR_PM_STATE |
MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PM_STATE |
MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_SRQN |
MLX5_QP_OPTPAR_CQN_RCV,
[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY |
MLX5_QP_OPTPAR_PM_STATE |
MLX5_QP_OPTPAR_RAE,
},
},
[MLX5_QP_STATE_SQER] = {
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RRE,
[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY |
MLX5_QP_OPTPAR_RAE,
},
},
[MLX5_QP_STATE_SQD] = {
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RRE,
},
},
};
static int ib_nr_to_mlx5_nr(int ib_mask)
{
switch (ib_mask) {
case IB_QP_STATE:
return 0;
case IB_QP_CUR_STATE:
return 0;
case IB_QP_EN_SQD_ASYNC_NOTIFY:
return 0;
case IB_QP_ACCESS_FLAGS:
return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE;
case IB_QP_PKEY_INDEX:
return MLX5_QP_OPTPAR_PKEY_INDEX;
case IB_QP_PORT:
return MLX5_QP_OPTPAR_PRI_PORT;
case IB_QP_QKEY:
return MLX5_QP_OPTPAR_Q_KEY;
case IB_QP_AV:
return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX5_QP_OPTPAR_PRI_PORT;
case IB_QP_PATH_MTU:
return 0;
case IB_QP_TIMEOUT:
return MLX5_QP_OPTPAR_ACK_TIMEOUT;
case IB_QP_RETRY_CNT:
return MLX5_QP_OPTPAR_RETRY_COUNT;
case IB_QP_RNR_RETRY:
return MLX5_QP_OPTPAR_RNR_RETRY;
case IB_QP_RQ_PSN:
return 0;
case IB_QP_MAX_QP_RD_ATOMIC:
return MLX5_QP_OPTPAR_SRA_MAX;
case IB_QP_ALT_PATH:
return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
case IB_QP_MIN_RNR_TIMER:
return MLX5_QP_OPTPAR_RNR_TIMEOUT;
case IB_QP_SQ_PSN:
return 0;
case IB_QP_MAX_DEST_RD_ATOMIC:
return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
case IB_QP_PATH_MIG_STATE:
return MLX5_QP_OPTPAR_PM_STATE;
case IB_QP_CAP:
return 0;
case IB_QP_DEST_QPN:
return 0;
}
return 0;
}
static int ib_mask_to_mlx5_opt(int ib_mask)
{
int result = 0;
int i;
for (i = 0; i < 8 * sizeof(int); i++) {
if ((1 << i) & ib_mask)
result |= ib_nr_to_mlx5_nr(1 << i);
}
return result;
}
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
[MLX5_QP_STATE_RST] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
[MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
},
[MLX5_QP_STATE_INIT] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
[MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
[MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
[MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
},
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
[MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
},
[MLX5_QP_STATE_SQD] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
[MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD_RTS_QP,
},
[MLX5_QP_STATE_SQER] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
[MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
},
[MLX5_QP_STATE_ERR] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
}
};
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_modify_qp_mbox_in *in;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
int sqd_event;
int mlx5_st;
int err;
u16 op;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return -ENOMEM;
context = &in->ctx;
err = to_mlx5_st(ibqp->qp_type);
if (err < 0)
goto out;
context->flags = cpu_to_be32(err << 16);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
} else {
switch (attr->path_mig_state) {
case IB_MIG_MIGRATED:
context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
break;
case IB_MIG_REARM:
context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
break;
case IB_MIG_ARMED:
context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
break;
}
}
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
} else if (ibqp->qp_type == IB_QPT_UD) {
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 ||
attr->path_mtu > IB_MTU_4096) {
mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
err = -EINVAL;
goto out;
}
context->mtu_msgmax = (attr->path_mtu << 5) |
(u8)MLX5_CAP_GEN(dev->mdev, log_max_msg);
}
if (attr_mask & IB_QP_DEST_QPN)
context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
if (attr_mask & IB_QP_PKEY_INDEX)
context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
/* todo implement counter_index functionality */
if (is_sqp(ibqp->qp_type))
context->pri_path.port = qp->port;
if (attr_mask & IB_QP_PORT)
context->pri_path.port = attr->port_num;
if (attr_mask & IB_QP_AV) {
err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
attr_mask, 0, attr, 0);
if (err)
goto out;
}
if (attr_mask & IB_QP_TIMEOUT)
context->pri_path.ackto_lt |= attr->timeout << 3;
if (attr_mask & IB_QP_ALT_PATH) {
err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
attr->alt_port_num,
attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
0, attr, 1);
if (err)
goto out;
}
pd = get_pd(qp);
get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
if (attr_mask & IB_QP_RNR_RETRY)
context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
if (attr_mask & IB_QP_RETRY_CNT)
context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
if (attr->max_rd_atomic)
context->params1 |=
cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
}
if (attr_mask & IB_QP_SQ_PSN)
context->next_send_psn = cpu_to_be32(attr->sq_psn & 0xffffff);
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
if (attr->max_dest_rd_atomic)
context->params2 |=
cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
}
if ((attr_mask & IB_QP_ACCESS_FLAGS) &&
(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
!dev->enable_atomic_resp) {
mlx5_ib_warn(dev, "atomic responder is not supported\n");
err = -EINVAL;
goto out;
}
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
if (attr_mask & IB_QP_MIN_RNR_TIMER)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
if (attr_mask & IB_QP_RQ_PSN)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn & 0xffffff);
if (attr_mask & IB_QP_QKEY)
context->qkey = cpu_to_be32(attr->qkey);
if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
else
sqd_event = 0;
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->sq_crq_size |= cpu_to_be16(1 << 4);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
struct mlx5_ib_port *mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32(mibport->q_cnt_id << 24);
}
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
mlx5_st = to_mlx5_st(ibqp->qp_type);
if (mlx5_st < 0)
goto out;
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new])
return -EINVAL;
op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
err = -EOPNOTSUPP;
else
err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
&qp->mqp);
if (err)
goto out;
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
qp->atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
qp->resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
qp->alt_port = attr->alt_port_num;
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq.cur_post = 0;
qp->sq.last_poll = 0;
if (qp->db.db) {
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
}
out:
kfree(in);
return err;
}
static int ignored_ts_check(enum ib_qp_type qp_type)
{
return 0;
}
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
if (!ignored_ts_check(ibqp->qp_type) &&
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, ll))
goto out;
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 ||
attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)))
goto out;
if (attr_mask & IB_QP_PKEY_INDEX) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
if (attr->pkey_index >=
dev->mdev->port_caps[port - 1].pkey_table_len)
goto out;
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic >
(1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp)))
goto out;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic >
(1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp)))
goto out;
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
goto out;
}
err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
mutex_unlock(&qp->mutex);
return err;
}
static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
unsigned cur;
cur = wq->head - wq->tail;
if (likely(cur + nreq < wq->max_post))
return 0;
cq = to_mcq(ib_cq);
spin_lock(&cq->lock);
cur = wq->head - wq->tail;
spin_unlock(&cq->lock);
return cur + nreq >= wq->max_post;
}
static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
u64 remote_addr, u32 rkey)
{
rseg->raddr = cpu_to_be64(remote_addr);
rseg->rkey = cpu_to_be32(rkey);
rseg->reserved = 0;
}
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
}
static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
{
dseg->byte_count = cpu_to_be32(sg->length);
dseg->lkey = cpu_to_be32(sg->lkey);
dseg->addr = cpu_to_be64(sg->addr);
}
static __be16 get_klm_octo(int npages)
{
return cpu_to_be16(ALIGN(npages, 8) / 2);
}
static __be64 frwr_mkey_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR |
MLX5_MKEY_MASK_EN_RINVAL |
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
MLX5_MKEY_MASK_A |
MLX5_MKEY_MASK_SMALL_FENCE |
MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr, int li)
{
memset(umr, 0, sizeof(*umr));
if (li) {
umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
umr->flags = 1 << 7;
return;
}
umr->flags = (1 << 5); /* fail if not free */
umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
umr->mkey_mask = frwr_mkey_mask();
}
static u8 get_umr_flags(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
(acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
}
static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
int li, int *writ)
{
memset(seg, 0, sizeof(*seg));
if (li) {
seg->status = MLX5_MKEY_STATUS_FREE;
return;
}
seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
MLX5_ACCESS_MODE_MTT;
*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
seg->len = cpu_to_be64(wr->wr.fast_reg.length);
seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
seg->log2_page_size = wr->wr.fast_reg.page_shift;
}
static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
struct ib_send_wr *wr,
struct mlx5_core_dev *mdev,
struct mlx5_ib_pd *pd,
int writ)
{
struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
u64 *page_list = wr->wr.fast_reg.page_list->page_list;
u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
int i;
for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
dseg->addr = cpu_to_be64(mfrpl->map);
dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
dseg->lkey = cpu_to_be32(pd->pa_lkey);
}
static __be32 send_ieth(struct ib_send_wr *wr)
{
switch (wr->opcode) {
case IB_WR_SEND_WITH_IMM:
case IB_WR_RDMA_WRITE_WITH_IMM:
return wr->ex.imm_data;
case IB_WR_SEND_WITH_INV:
return cpu_to_be32(wr->ex.invalidate_rkey);
default:
return 0;
}
}
static u8 calc_sig(void *wqe, int size)
{
u8 *p = wqe;
u8 res = 0;
int i;
for (i = 0; i < size; i++)
res ^= p[i];
return ~res;
}
static u8 calc_wq_sig(void *wqe)
{
return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
}
static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
void *wqe, int *sz)
{
struct mlx5_wqe_inline_seg *seg;
void *qend = qp->sq.qend;
void *addr;
int inl = 0;
int copy;
int len;
int i;
seg = wqe;
wqe += sizeof(*seg);
for (i = 0; i < wr->num_sge; i++) {
addr = (void *)(uintptr_t)(wr->sg_list[i].addr);
len = wr->sg_list[i].length;
inl += len;
if (unlikely(inl > qp->max_inline_data))
return -ENOMEM;
if (unlikely(wqe + len > qend)) {
copy = (int)(qend - wqe);
memcpy(wqe, addr, copy);
addr += copy;
len -= copy;
wqe = mlx5_get_send_wqe(qp, 0);
}
memcpy(wqe, addr, len);
wqe += len;
}
seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
*sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
return 0;
}
static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
{
int writ = 0;
int li;
li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
if (unlikely(wr->send_flags & IB_SEND_INLINE))
return -EINVAL;
set_frwr_umr_segment(*seg, wr, li);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
set_mkey_segment(*seg, wr, li, &writ);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
if (!li) {
if (unlikely(wr->wr.fast_reg.page_list_len >
wr->wr.fast_reg.page_list->max_page_list_len))
return -ENOMEM;
set_frwr_pages(*seg, wr, mdev, pd, writ);
*seg += sizeof(struct mlx5_wqe_data_seg);
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
}
return 0;
}
static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
{
__be32 *p = NULL;
int tidx = idx;
int i, j;
pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
if ((i & 0xf) == 0) {
void *buf = mlx5_get_send_wqe(qp, tidx);
tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
p = buf;
j = 0;
}
pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
be32_to_cpu(p[j + 3]));
}
}
static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
unsigned bytecnt, struct mlx5_ib_qp *qp)
{
while (bytecnt > 0) {
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
__iowrite64_copy(dst++, src++, 8);
bytecnt -= 64;
if (unlikely(src == qp->sq.qend))
src = mlx5_get_send_wqe(qp, 0);
}
}
static u8 get_fence(u8 fence, struct ib_send_wr *wr)
{
if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
wr->send_flags & IB_SEND_FENCE))
return MLX5_FENCE_MODE_STRONG_ORDERING;
if (unlikely(fence)) {
if (wr->send_flags & IB_SEND_FENCE)
return MLX5_FENCE_MODE_SMALL_AND_FENCE;
else
return fence;
} else {
return 0;
}
}
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
struct ib_send_wr *wr, unsigned *idx,
int *size, int nreq)
{
int err = 0;
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
mlx5_ib_warn(to_mdev(qp->ibqp.device), "work queue overflow\n");
err = -ENOMEM;
return err;
}
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_get_send_wqe(qp, *idx);
*ctrl = *seg;
*(u32 *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
(*ctrl)->fm_ce_se = qp->sq_signal_bits |
(wr->send_flags & IB_SEND_SIGNALED ?
MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(wr->send_flags & IB_SEND_SOLICITED ?
MLX5_WQE_CTRL_SOLICITED : 0);
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
return err;
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
u8 size, unsigned idx,
struct ib_send_wr *wr,
int nreq, u8 fence, u8 next_fence,
u32 mlx5_opcode)
{
u8 opmod = 0;
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
ctrl->signature = calc_wq_sig(ctrl);
qp->sq.swr_ctx[idx].wrid = wr->wr_id;
qp->sq.swr_ctx[idx].w_list.opcode = mlx5_opcode;
qp->sq.swr_ctx[idx].wqe_head = qp->sq.head + nreq;
qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
qp->sq.swr_ctx[idx].w_list.next = qp->sq.cur_post;
qp->sq.swr_ctx[idx].sig_piped = 0;
}
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf = qp->bf;
int uninitialized_var(size);
void *qend = qp->sq.qend;
unsigned long flags;
unsigned idx;
int err = 0;
int inl = 0;
int num_sge;
void *seg;
int nreq;
int i;
u8 next_fence = 0;
u8 fence;
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "Invalid opcode 0x%x\n", wr->opcode);
err = -EINVAL;
*bad_wr = wr;
goto out;
}
fence = qp->fm_cache;
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "Max gs exceeded %d (max = %d)\n", wr->num_sge, qp->sq.max_gs);
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
if (err) {
mlx5_ib_warn(dev, "Failed to prepare WQE\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
seg += sizeof(*xrc);
size += sizeof(*xrc) / 16;
/* fall through */
case IB_QPT_RC:
switch (wr->opcode) {
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
set_raddr_seg(seg, wr->wr.rdma.remote_addr,
wr->wr.rdma.rkey);
seg += sizeof(struct mlx5_wqe_raddr_seg);
size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
err = -ENOSYS;
*bad_wr = wr;
goto out;
case IB_WR_LOCAL_INV:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.swr_ctx[idx].wr_data = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
if (err) {
mlx5_ib_warn(dev, "Failed to prepare LOCAL_INV WQE\n");
*bad_wr = wr;
goto out;
}
num_sge = 0;
break;
case IB_WR_FAST_REG_MR:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.swr_ctx[idx].wr_data = IB_WR_FAST_REG_MR;
ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
if (err) {
mlx5_ib_warn(dev, "Failed to prepare FAST_REG_MR WQE\n");
*bad_wr = wr;
goto out;
}
num_sge = 0;
break;
default:
break;
}
break;
case IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
set_raddr_seg(seg, wr->wr.rdma.remote_addr,
wr->wr.rdma.rkey);
seg += sizeof(struct mlx5_wqe_raddr_seg);
size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
break;
default:
break;
}
break;
case IB_QPT_SMI:
if (!mlx5_core_is_pf(mdev)) {
err = -EINVAL;
mlx5_ib_warn(dev, "Only physical function is allowed to send SMP MADs\n");
*bad_wr = wr;
goto out;
}
case IB_QPT_GSI:
case IB_QPT_UD:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
if (unlikely((seg == qend)))
seg = mlx5_get_send_wqe(qp, 0);
break;
default:
break;
}
if (wr->send_flags & IB_SEND_INLINE && num_sge) {
int uninitialized_var(sz);
err = set_data_inl_seg(qp, wr, seg, &sz);
if (unlikely(err)) {
mlx5_ib_warn(dev, "Failed to prepare inline data segment\n");
*bad_wr = wr;
goto out;
}
inl = 1;
size += sz;
} else {
dpseg = seg;
for (i = 0; i < num_sge; i++) {
if (unlikely(dpseg == qend)) {
seg = mlx5_get_send_wqe(qp, 0);
dpseg = seg;
}
if (likely(wr->sg_list[i].length)) {
set_data_ptr_seg(dpseg, wr->sg_list + i);
size += sizeof(struct mlx5_wqe_data_seg) / 16;
dpseg++;
}
}
}
finish_wqe(qp, ctrl, size, idx, wr, nreq,
get_fence(fence, wr), next_fence,
mlx5_ib_opcode[wr->opcode]);
if (0)
dump_wqe(qp, idx, size);
}
out:
if (likely(nreq)) {
qp->sq.head += nreq;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell */
wmb();
if (bf->need_lock)
spin_lock(&bf->lock);
else
__acquire(&bf->lock);
/* TBD enable WC */
if (BF_ENABLE && nreq == 1 && bf->uuarn && inl && size > 1 &&
size <= bf->buf_size / 16) {
mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
/* wc_wmb(); */
} else {
mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
MLX5_GET_DOORBELL_LOCK(&bf->lock32));
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
mmiowb();
}
bf->offset ^= bf->buf_size;
if (bf->need_lock)
spin_unlock(&bf->lock);
else
__release(&bf->lock);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}
static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
{
sig->signature = calc_sig(sig, size);
}
int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *scat;
struct mlx5_rwqe_sig *sig;
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
int ind;
int i;
spin_lock_irqsave(&qp->rq.lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
if (unlikely(wr->num_sge > qp->rq.max_gs)) {
err = -EINVAL;
*bad_wr = wr;
goto out;
}
scat = get_recv_wqe(qp, ind);
if (qp->wq_sig)
scat++;
for (i = 0; i < wr->num_sge; i++)
set_data_ptr_seg(scat + i, wr->sg_list + i);
if (i < qp->rq.max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
scat[i].addr = 0;
}
if (qp->wq_sig) {
sig = (struct mlx5_rwqe_sig *)scat;
set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
}
qp->rq.rwr_ctx[ind].wrid = wr->wr_id;
ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
}
out:
if (likely(nreq)) {
qp->rq.head += nreq;
/* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
}
spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}
static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
{
switch (mlx5_state) {
case MLX5_QP_STATE_RST: return IB_QPS_RESET;
case MLX5_QP_STATE_INIT: return IB_QPS_INIT;
case MLX5_QP_STATE_RTR: return IB_QPS_RTR;
case MLX5_QP_STATE_RTS: return IB_QPS_RTS;
case MLX5_QP_STATE_SQ_DRAINING:
case MLX5_QP_STATE_SQD: return IB_QPS_SQD;
case MLX5_QP_STATE_SQER: return IB_QPS_SQE;
case MLX5_QP_STATE_ERR: return IB_QPS_ERR;
default: return -1;
}
}
static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
{
switch (mlx5_mig_state) {
case MLX5_QP_PM_ARMED: return IB_MIG_ARMED;
case MLX5_QP_PM_REARM: return IB_MIG_REARM;
case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
default: return -1;
}
}
static int to_ib_qp_access_flags(int mlx5_flags)
{
int ib_flags = 0;
if (mlx5_flags & MLX5_QP_BIT_RRE)
ib_flags |= IB_ACCESS_REMOTE_READ;
if (mlx5_flags & MLX5_QP_BIT_RWE)
ib_flags |= IB_ACCESS_REMOTE_WRITE;
if (mlx5_flags & MLX5_QP_BIT_RAE)
ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
return ib_flags;
}
static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
struct mlx5_qp_path *path)
{
struct mlx5_core_dev *dev = ibdev->mdev;
memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
ib_ah_attr->port_num = path->port;
if (ib_ah_attr->port_num == 0 ||
ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
return;
ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
if (ib_ah_attr->ah_flags) {
ib_ah_attr->grh.sgid_index = path->mgid_index;
ib_ah_attr->grh.hop_limit = path->hop_limit;
ib_ah_attr->grh.traffic_class =
(be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
ib_ah_attr->grh.flow_label =
be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
memcpy(ib_ah_attr->grh.dgid.raw,
path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
}
}
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int mlx5_state;
int err = 0;
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
err = -EOPNOTSUPP;
goto out;
} else {
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
if (!outb) {
err = -ENOMEM;
goto out;
}
context = &outb->ctx;
err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb,
sizeof(*outb));
if (err) {
kfree(outb);
goto out;
}
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
qp_attr->path_mtu = context->mtu_msgmax >> 5;
qp_attr->path_mig_state =
to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
qp_attr->qkey = be32_to_cpu(context->qkey);
qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
qp_attr->qp_access_flags =
to_ib_qp_access_flags(be32_to_cpu(context->params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
qp_attr->alt_pkey_index = be16_to_cpu(context->alt_path.pkey_index);
qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
}
qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
qp_attr->port_num = context->pri_path.port;
/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
qp_attr->max_dest_rd_atomic =
1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
qp_attr->min_rnr_timer =
(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
qp_attr->timeout = context->pri_path.ackto_lt >> 3;
qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
kfree(outb);
}
qp_attr->qp_state = qp->state;
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
if (!ibqp->uobject) {
qp_attr->cap.max_send_wr = qp->sq.max_post;
qp_attr->cap.max_send_sge = qp->sq.max_gs;
qp_init_attr->qp_context = ibqp->qp_context;
} else {
qp_attr->cap.max_send_wr = 0;
qp_attr->cap.max_send_sge = 0;
}
qp_init_attr->qp_type = ibqp->qp_type;
qp_init_attr->recv_cq = ibqp->recv_cq;
qp_init_attr->send_cq = ibqp->send_cq;
qp_init_attr->srq = ibqp->srq;
qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
qp_init_attr->create_flags = 0;
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
out:
mutex_unlock(&qp->mutex);
return err;
}
struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
int err;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
return ERR_PTR(-ENOSYS);
xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
if (!xrcd)
return ERR_PTR(-ENOMEM);
err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
if (err) {
kfree(xrcd);
return ERR_PTR(-ENOMEM);
}
return &xrcd->ibxrcd;
}
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
int err;
err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
if (err) {
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
return err;
}
kfree(xrcd);
return 0;
}
diff --git a/sys/ofed/drivers/infiniband/core/cma.c b/sys/ofed/drivers/infiniband/core/cma.c
index 1d172016fc64..fed28cc85f9e 100644
--- a/sys/ofed/drivers/infiniband/core/cma.c
+++ b/sys/ofed/drivers/infiniband/core/cma.c
@@ -1,3886 +1,3853 @@
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
* Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define LINUXKPI_PARAM_PREFIX ibcore_
#include <linux/completion.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
#define CMA_IBOE_PACKET_LIFETIME 18
static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
module_param_named(cma_response_timeout, cma_response_timeout, int, 0644);
MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT (default=20)");
static int def_prec2sl = 3;
module_param_named(def_prec2sl, def_prec2sl, int, 0644);
MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7");
-static int unify_tcp_port_space = 1;
-module_param(unify_tcp_port_space, int, 0644);
-MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
- "space allocation (default=1)");
-
static int debug_level = 0;
#define cma_pr(level, priv, format, arg...) \
printk(level "CMA: %p: %s: " format, ((struct rdma_id_priv *) priv) , __func__, ## arg)
#define cma_dbg(priv, format, arg...) \
do { if (debug_level) cma_pr(KERN_DEBUG, priv, format, ## arg); } while (0)
#define cma_warn(priv, format, arg...) \
cma_pr(KERN_WARNING, priv, format, ## arg)
#define CMA_GID_FMT "%2.2x%2.2x:%2.2x%2.2x"
#define CMA_GID_RAW_ARG(gid) ((u8 *)(gid))[12],\
((u8 *)(gid))[13],\
((u8 *)(gid))[14],\
((u8 *)(gid))[15]
#define CMA_GID_ARG(gid) CMA_GID_RAW_ARG((gid).raw)
#define cma_debug_path(priv, pfx, p) \
cma_dbg(priv, pfx "sgid=" CMA_GID_FMT ",dgid=" \
CMA_GID_FMT "\n", CMA_GID_ARG(p.sgid), \
CMA_GID_ARG(p.dgid))
#define cma_debug_gid(priv, g) \
cma_dbg(priv, "gid=" CMA_GID_FMT "\n", CMA_GID_ARG(g)
module_param_named(debug_level, debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "debug level default=0");
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device);
static struct ib_client cma_client = {
.name = "cma",
.add = cma_add_one,
.remove = cma_remove_one
};
static struct ib_sa_client sa_client;
static struct rdma_addr_client addr_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
static struct workqueue_struct *cma_free_wq;
static DEFINE_IDR(sdp_ps);
static DEFINE_IDR(tcp_ps);
static DEFINE_IDR(udp_ps);
static DEFINE_IDR(ipoib_ps);
static DEFINE_IDR(ib_ps);
struct cma_device {
struct list_head list;
struct ib_device *device;
struct completion comp;
atomic_t refcount;
struct list_head id_list;
};
struct rdma_bind_list {
struct idr *ps;
struct hlist_head owners;
unsigned short port;
};
enum {
CMA_OPTION_AFONLY,
};
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
struct rdma_id_private {
struct rdma_cm_id id;
struct rdma_bind_list *bind_list;
struct socket *sock;
struct hlist_node node;
struct list_head list; /* listen_any_list or cma_device.list */
struct list_head listen_list; /* per device listens */
struct cma_device *cma_dev;
struct list_head mc_list;
int internal_id;
enum rdma_cm_state state;
spinlock_t lock;
spinlock_t cm_lock;
struct mutex qp_mutex;
struct completion comp;
atomic_t refcount;
struct mutex handler_mutex;
struct work_struct work; /* garbage coll */
int backlog;
int timeout_ms;
struct ib_sa_query *query;
int query_id;
union {
struct ib_cm_id *ib;
struct iw_cm_id *iw;
} cm_id;
u32 seq_num;
u32 qkey;
u32 qp_num;
pid_t owner;
u32 options;
u8 srq;
u8 tos;
u8 reuseaddr;
u8 afonly;
int qp_timeout;
/* cache for mc record params */
struct ib_sa_mcmember_rec rec;
int is_valid_rec;
+ int unify_ps_tcp;
};
struct cma_multicast {
struct rdma_id_private *id_priv;
union {
struct ib_sa_multicast *ib;
} multicast;
struct list_head list;
void *context;
struct sockaddr_storage addr;
struct kref mcref;
};
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
enum rdma_cm_state old_state;
enum rdma_cm_state new_state;
struct rdma_cm_event event;
};
struct cma_ndev_work {
struct work_struct work;
struct rdma_id_private *id;
struct rdma_cm_event event;
};
struct iboe_mcast_work {
struct work_struct work;
struct rdma_id_private *id;
struct cma_multicast *mc;
};
union cma_ip_addr {
struct in6_addr ip6;
struct {
__be32 pad[3];
__be32 addr;
} ip4;
};
struct cma_hdr {
u8 cma_version;
u8 ip_version; /* IP version: 7:4 */
__be16 port;
union cma_ip_addr src_addr;
union cma_ip_addr dst_addr;
};
struct sdp_hh {
u8 bsdh[16];
u8 sdp_version; /* Major version: 7:4 */
u8 ip_version; /* IP version: 7:4 */
u8 sdp_specific1[10];
__be16 port;
__be16 sdp_specific2;
union cma_ip_addr src_addr;
union cma_ip_addr dst_addr;
};
struct sdp_hah {
u8 bsdh[16];
u8 sdp_version;
};
#define CMA_VERSION 0x00
#define SDP_MAJ_VERSION 0x2
static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&id_priv->lock, flags);
ret = (id_priv->state == comp);
spin_unlock_irqrestore(&id_priv->lock, flags);
return ret;
}
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
spin_unlock_irqrestore(&id_priv->lock, flags);
return ret;
}
static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state exch)
{
unsigned long flags;
enum rdma_cm_state old;
spin_lock_irqsave(&id_priv->lock, flags);
old = id_priv->state;
id_priv->state = exch;
spin_unlock_irqrestore(&id_priv->lock, flags);
return old;
}
static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
{
return hdr->ip_version >> 4;
}
static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
static inline u8 sdp_get_majv(u8 sdp_version)
{
return sdp_version >> 4;
}
static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
{
return hh->ip_version >> 4;
}
static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
{
hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
atomic_inc(&cma_dev->refcount);
id_priv->cma_dev = cma_dev;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
static inline void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
complete(&cma_dev->comp);
}
static inline void release_mc(struct kref *kref)
{
struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
kfree(mc->multicast.ib);
kfree(mc);
}
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
list_del(&id_priv->list);
cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL;
mutex_unlock(&lock);
}
static int cma_set_qkey(struct rdma_id_private *id_priv)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
if (id_priv->qkey)
return 0;
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
id_priv->qkey = RDMA_UDP_QKEY;
break;
case RDMA_PS_IPOIB:
ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
ret = ib_sa_get_mcmember_rec(id_priv->id.device,
id_priv->id.port_num, &rec.mgid,
&rec);
if (!ret)
id_priv->qkey = be32_to_cpu(rec.qkey);
break;
default:
break;
}
return ret;
}
static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
{
int i;
int err;
struct ib_port_attr props;
union ib_gid tmp;
err = ib_query_port(device, port_num, &props);
if (err)
return 1;
for (i = 0; i < props.gid_tbl_len; ++i) {
err = ib_query_gid(device, port_num, i, &tmp);
if (err)
return 1;
if (!memcmp(&tmp, gid, sizeof tmp))
return 0;
}
return -EAGAIN;
}
int
rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
void **cm_id)
{
int ret;
u8 port;
int found_dev = 0, found_cmid = 0;
struct rdma_id_private *id_priv;
struct rdma_id_private *dev_id_priv;
struct cma_device *cma_dev;
struct rdma_dev_addr dev_addr;
union ib_gid gid;
enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
memset(&dev_addr, 0, sizeof(dev_addr));
ret = rdma_translate_ip((struct sockaddr *)local_addr,
&dev_addr, NULL);
if (ret)
goto err;
/* find rdma device based on MAC address/gid */
mutex_lock(&lock);
memcpy(&gid, dev_addr.src_dev_addr +
rdma_addr_gid_offset(&dev_addr), sizeof(gid));
list_for_each_entry(cma_dev, &dev_list, list)
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
if ((rdma_port_get_link_layer(cma_dev->device, port) ==
dev_ll) &&
(rdma_node_get_transport(cma_dev->device->node_type) ==
RDMA_TRANSPORT_IWARP)) {
ret = find_gid_port(cma_dev->device,
&gid, port);
if (!ret) {
found_dev = 1;
goto out;
} else if (ret == 1) {
mutex_unlock(&lock);
goto err;
}
}
out:
mutex_unlock(&lock);
if (!found_dev)
goto err;
/* Traverse through the list of listening cm_id's to find the
* desired cm_id based on rdma device & port number.
*/
list_for_each_entry(id_priv, &listen_any_list, list)
list_for_each_entry(dev_id_priv, &id_priv->listen_list,
listen_list)
if (dev_id_priv->cma_dev == cma_dev)
if (dev_id_priv->cm_id.iw->local_addr.sin_port
== local_addr->sin_port) {
*cm_id = (void *)dev_id_priv->cm_id.iw;
found_cmid = 1;
}
return found_cmid ? 0 : -ENODEV;
err:
return -ENODEV;
}
EXPORT_SYMBOL(rdma_find_cmid_laddr);
static int cma_acquire_dev(struct rdma_id_private *id_priv,
struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid;
int ret = -ENODEV;
u8 port, found_port;
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
if (dev_ll != IB_LINK_LAYER_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
mutex_lock(&lock);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
if (listen_id_priv &&
rdma_port_get_link_layer(listen_id_priv->id.device,
listen_id_priv->id.port_num) == dev_ll) {
cma_dev = listen_id_priv->cma_dev;
port = listen_id_priv->id.port_num;
if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
&found_port, NULL);
else
ret = ib_find_cached_gid(cma_dev->device, &gid,
&found_port, NULL);
if (!ret && (port == found_port)) {
id_priv->id.port_num = found_port;
goto out;
}
}
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
if (listen_id_priv &&
listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
else
ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
if (!ret && (port == found_port)) {
id_priv->id.port_num = port;
goto out;
} else if (ret == 1)
break;
}
}
}
out:
if (!ret)
cma_attach_to_dev(id_priv, cma_dev);
mutex_unlock(&lock);
return ret;
}
static void cma_deref_id(struct rdma_id_private *id_priv)
{
if (atomic_dec_and_test(&id_priv->refcount))
complete(&id_priv->comp);
}
static int cma_disable_callback(struct rdma_id_private *id_priv,
enum rdma_cm_state state)
{
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state != state) {
mutex_unlock(&id_priv->handler_mutex);
return -EINVAL;
}
return 0;
}
struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps,
enum ib_qp_type qp_type)
{
struct rdma_id_private *id_priv;
id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
if (!id_priv)
return ERR_PTR(-ENOMEM);
id_priv->owner = curthread->td_proc->p_pid;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
spin_lock_init(&id_priv->lock);
spin_lock_init(&id_priv->cm_lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
return &id_priv->id;
}
EXPORT_SYMBOL(rdma_create_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
qp_attr.qp_state = IB_QPS_INIT;
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
return ret;
ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
if (ret)
return ret;
qp_attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
if (ret)
return ret;
qp_attr.qp_state = IB_QPS_RTS;
qp_attr.sq_psn = 0;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
return ret;
}
static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
qp_attr.qp_state = IB_QPS_INIT;
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
return ret;
return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
}
int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
struct rdma_id_private *id_priv;
struct ib_qp *qp;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (id->device != pd->device)
return -EINVAL;
qp = ib_create_qp(pd, qp_init_attr);
if (IS_ERR(qp))
return PTR_ERR(qp);
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
if (ret)
goto err;
id->qp = qp;
id_priv->qp_num = qp->qp_num;
id_priv->srq = (qp->srq != NULL);
return 0;
err:
ib_destroy_qp(qp);
return ret;
}
EXPORT_SYMBOL(rdma_create_qp);
void rdma_destroy_qp(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_destroy_qp);
static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
ret = 0;
goto out;
}
/* Need to update QP attributes from default values. */
qp_attr.qp_state = IB_QPS_INIT;
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
goto out;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
if (ret)
goto out;
qp_attr.qp_state = IB_QPS_RTR;
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
qp_attr.ah_attr.grh.sgid_index, &sgid);
if (ret)
goto out;
if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
== RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
== IB_LINK_LAYER_ETHERNET) {
u32 scope_id = rdma_get_ipv6_scope_id(id_priv->id.device,
id_priv->id.port_num);
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL,
scope_id);
if (ret)
goto out;
}
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
}
static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
ret = 0;
goto out;
}
qp_attr.qp_state = IB_QPS_RTS;
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
goto out;
if (conn_param)
qp_attr.max_rd_atomic = conn_param->initiator_depth;
if (id_priv->qp_timeout && id_priv->id.qp->qp_type == IB_QPT_RC) {
qp_attr.timeout = id_priv->qp_timeout;
qp_attr_mask |= IB_QP_TIMEOUT;
}
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
}
static int cma_modify_qp_err(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
int ret;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
ret = 0;
goto out;
}
qp_attr.qp_state = IB_QPS_ERR;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
}
static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
struct ib_qp_attr *qp_attr, int *qp_attr_mask)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int ret;
u16 pkey;
if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
IB_LINK_LAYER_INFINIBAND)
pkey = ib_addr_get_pkey(dev_addr);
else
pkey = 0xffff;
ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
pkey, &qp_attr->pkey_index);
if (ret)
return ret;
qp_attr->port_num = id_priv->id.port_num;
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (id_priv->id.qp_type == IB_QPT_UD) {
ret = cma_set_qkey(id_priv);
if (ret)
return ret;
qp_attr->qkey = id_priv->qkey;
*qp_attr_mask |= IB_QP_QKEY;
} else {
qp_attr->qp_access_flags = 0;
*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
}
return 0;
}
int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
int *qp_attr_mask)
{
struct rdma_id_private *id_priv;
int ret = 0;
id_priv = container_of(id, struct rdma_id_private, id);
- switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id->device, id->port_num)) {
if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);
+
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
if (!id_priv->cm_id.iw) {
qp_attr->qp_access_flags = 0;
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
} else
ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
qp_attr_mask);
- break;
- default:
+ } else
ret = -ENOSYS;
- break;
- }
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
static inline int cma_zero_addr(struct sockaddr *addr)
{
struct in6_addr *ip6;
if (addr->sa_family == AF_INET)
return ipv4_is_zeronet(
((struct sockaddr_in *)addr)->sin_addr.s_addr);
else {
ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
}
}
static inline int cma_loopback_addr(struct sockaddr *addr)
{
if (addr->sa_family == AF_INET)
return ipv4_is_loopback(
((struct sockaddr_in *) addr)->sin_addr.s_addr);
else
return ipv6_addr_loopback(
&((struct sockaddr_in6 *) addr)->sin6_addr);
}
static inline int cma_any_addr(struct sockaddr *addr)
{
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
int
rdma_cma_any_addr(struct sockaddr *addr)
{
return cma_any_addr(addr);
}
EXPORT_SYMBOL(rdma_cma_any_addr);
static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
{
if (src->sa_family != dst->sa_family)
return -1;
switch (src->sa_family) {
case AF_INET:
return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
((struct sockaddr_in *) dst)->sin_addr.s_addr;
default:
return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
&((struct sockaddr_in6 *) dst)->sin6_addr);
}
}
static inline __be16 cma_port(struct sockaddr *addr)
{
if (addr->sa_family == AF_INET)
return ((struct sockaddr_in *) addr)->sin_port;
else
return ((struct sockaddr_in6 *) addr)->sin6_port;
}
static inline int cma_any_port(struct sockaddr *addr)
{
return !cma_port(addr);
}
static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
u8 *ip_ver, __be16 *port,
union cma_ip_addr **src, union cma_ip_addr **dst)
{
switch (ps) {
case RDMA_PS_SDP:
if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
SDP_MAJ_VERSION)
return -EINVAL;
*ip_ver = sdp_get_ip_ver(hdr);
*port = ((struct sdp_hh *) hdr)->port;
*src = &((struct sdp_hh *) hdr)->src_addr;
*dst = &((struct sdp_hh *) hdr)->dst_addr;
break;
default:
if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
return -EINVAL;
*ip_ver = cma_get_ip_ver(hdr);
*port = ((struct cma_hdr *) hdr)->port;
*src = &((struct cma_hdr *) hdr)->src_addr;
*dst = &((struct cma_hdr *) hdr)->dst_addr;
break;
}
if (*ip_ver != 4 && *ip_ver != 6)
return -EINVAL;
return 0;
}
static void cma_save_net_info(struct rdma_addr *addr,
struct rdma_addr *listen_addr,
u8 ip_ver, __be16 port,
union cma_ip_addr *src, union cma_ip_addr *dst)
{
struct sockaddr_in *listen4, *ip4;
struct sockaddr_in6 *listen6, *ip6;
switch (ip_ver) {
case 4:
listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
ip4 = (struct sockaddr_in *) &addr->src_addr;
ip4->sin_family = listen4->sin_family;
ip4->sin_addr.s_addr = dst->ip4.addr;
ip4->sin_port = listen4->sin_port;
ip4->sin_len = sizeof(struct sockaddr_in);
ip4 = (struct sockaddr_in *) &addr->dst_addr;
ip4->sin_family = listen4->sin_family;
ip4->sin_addr.s_addr = src->ip4.addr;
ip4->sin_port = port;
ip4->sin_len = sizeof(struct sockaddr_in);
break;
case 6:
listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
ip6 = (struct sockaddr_in6 *) &addr->src_addr;
ip6->sin6_family = listen6->sin6_family;
ip6->sin6_addr = dst->ip6;
ip6->sin6_port = listen6->sin6_port;
ip6->sin6_len = sizeof(struct sockaddr_in6);
ip6->sin6_scope_id = listen6->sin6_scope_id;
ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
ip6->sin6_family = listen6->sin6_family;
ip6->sin6_addr = src->ip6;
ip6->sin6_port = port;
ip6->sin6_len = sizeof(struct sockaddr_in6);
ip6->sin6_scope_id = listen6->sin6_scope_id;
break;
default:
break;
}
}
static inline int cma_user_data_offset(enum rdma_port_space ps)
{
switch (ps) {
case RDMA_PS_SDP:
return 0;
default:
return sizeof(struct cma_hdr);
}
}
static void cma_cancel_route(struct rdma_id_private *id_priv)
{
switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
case IB_LINK_LAYER_INFINIBAND:
if (id_priv->query)
ib_sa_cancel_query(id_priv->query_id, id_priv->query);
break;
default:
break;
}
}
static void cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
mutex_lock(&lock);
list_del(&id_priv->list);
while (!list_empty(&id_priv->listen_list)) {
dev_id_priv = list_entry(id_priv->listen_list.next,
struct rdma_id_private, listen_list);
/* sync with device removal to avoid duplicate destruction */
list_del_init(&dev_id_priv->list);
list_del(&dev_id_priv->listen_list);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
mutex_unlock(&lock);
}
static void cma_cancel_operation(struct rdma_id_private *id_priv,
enum rdma_cm_state state)
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
cma_cancel_route(id_priv);
break;
case RDMA_CM_LISTEN:
if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
&& !id_priv->cma_dev)
cma_cancel_listens(id_priv);
break;
default:
break;
}
}
static void cma_release_port(struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
mutex_lock(&lock);
bind_list = id_priv->bind_list;
if (!bind_list) {
mutex_unlock(&lock);
return;
}
hlist_del(&id_priv->node);
id_priv->bind_list = NULL;
if (hlist_empty(&bind_list->owners)) {
idr_remove(bind_list->ps, bind_list->port);
kfree(bind_list);
}
mutex_unlock(&lock);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
{
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
mc = container_of(id_priv->mc_list.next,
struct cma_multicast, list);
list_del(&mc->list);
switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
case IB_LINK_LAYER_INFINIBAND:
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
break;
case IB_LINK_LAYER_ETHERNET:
kref_put(&mc->mcref, release_mc);
break;
default:
break;
}
}
}
static void __rdma_free(struct work_struct *work)
{
struct rdma_id_private *id_priv;
id_priv = container_of(work, struct rdma_id_private, work);
wait_for_completion(&id_priv->comp);
if (id_priv->internal_id)
cma_deref_id(id_priv->id.context);
+ if (id_priv->sock != NULL && !id_priv->internal_id &&
+ !id_priv->unify_ps_tcp)
+ sock_release(id_priv->sock);
+
kfree(id_priv->id.route.path_rec);
kfree(id_priv);
}
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
enum rdma_cm_state state;
unsigned long flags;
struct ib_cm_id *ib;
id_priv = container_of(id, struct rdma_id_private, id);
state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
/*
* Wait for any active callback to finish. New callbacks will find
* the id_priv state set to destroying and abort.
*/
mutex_lock(&id_priv->handler_mutex);
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) {
- switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
spin_lock_irqsave(&id_priv->cm_lock, flags);
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) {
ib = id_priv->cm_id.ib;
id_priv->cm_id.ib = NULL;
spin_unlock_irqrestore(&id_priv->cm_lock, flags);
ib_destroy_cm_id(ib);
} else
spin_unlock_irqrestore(&id_priv->cm_lock, flags);
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.iw)
iw_destroy_cm_id(id_priv->cm_id.iw);
- break;
- default:
- break;
}
cma_leave_mc_groups(id_priv);
cma_release_dev(id_priv);
}
cma_release_port(id_priv);
cma_deref_id(id_priv);
INIT_WORK(&id_priv->work, __rdma_free);
queue_work(cma_free_wq, &id_priv->work);
}
EXPORT_SYMBOL(rdma_destroy_id);
static int cma_rep_recv(struct rdma_id_private *id_priv)
{
int ret;
ret = cma_modify_qp_rtr(id_priv, NULL);
if (ret)
goto reject;
ret = cma_modify_qp_rts(id_priv, NULL);
if (ret)
goto reject;
cma_dbg(id_priv, "sending RTU\n");
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(id_priv);
cma_dbg(id_priv, "sending REJ\n");
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
}
static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
{
if (id_priv->id.ps == RDMA_PS_SDP &&
sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
SDP_MAJ_VERSION)
return -EINVAL;
return 0;
}
static void cma_set_rep_event_data(struct rdma_cm_event *event,
struct ib_cm_rep_event_param *rep_data,
void *private_data)
{
event->param.conn.private_data = private_data;
event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
event->param.conn.responder_resources = rep_data->responder_resources;
event->param.conn.initiator_depth = rep_data->initiator_depth;
event->param.conn.flow_control = rep_data->flow_control;
event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
event->param.conn.srq = rep_data->srq;
event->param.conn.qp_num = rep_data->remote_qpn;
}
static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event;
int ret = 0;
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
return 0;
memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_REQ_ERROR:
case IB_CM_REP_ERROR:
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
event.status = cma_verify_rep(id_priv, ib_event->private_data);
if (event.status)
event.event = RDMA_CM_EVENT_CONNECT_ERROR;
else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
RDMA_CM_EVENT_ESTABLISHED;
} else
event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
ib_event->private_data);
break;
case IB_CM_RTU_RECEIVED:
case IB_CM_USER_ESTABLISHED:
event.event = RDMA_CM_EVENT_ESTABLISHED;
break;
case IB_CM_DREQ_ERROR:
event.status = -ETIMEDOUT; /* fall through */
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
RDMA_CM_DISCONNECT))
goto out;
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
case IB_CM_TIMEWAIT_EXIT:
event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
break;
case IB_CM_MRA_RECEIVED:
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
event.param.conn.private_data = ib_event->private_data;
event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
break;
default:
printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
ib_event->event);
goto out;
}
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
return ret;
}
static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
union cma_ip_addr *src, *dst;
__be16 port;
u8 ip_ver;
int ret;
if (cma_get_net_info(ib_event->private_data, listen_id->ps,
&ip_ver, &port, &src, &dst))
return NULL;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
listen_id->ps, ib_event->param.req_rcvd.qp_type);
if (IS_ERR(id))
return NULL;
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
rt = &id->route;
rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
GFP_KERNEL);
if (!rt->path_rec)
goto err;
rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
} else {
ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
&rt->addr.dev_addr, NULL);
if (ret)
goto err;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = RDMA_CM_CONNECT;
return id_priv;
err:
rdma_destroy_id(id);
return NULL;
}
static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
union cma_ip_addr *src, *dst;
__be16 port;
u8 ip_ver;
int ret;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
listen_id->ps, IB_QPT_UD);
if (IS_ERR(id))
return NULL;
if (cma_get_net_info(ib_event->private_data, listen_id->ps,
&ip_ver, &port, &src, &dst))
goto err;
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr, NULL);
if (ret)
goto err;
}
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = RDMA_CM_CONNECT;
return id_priv;
err:
rdma_destroy_id(id);
return NULL;
}
static void cma_set_req_event_data(struct rdma_cm_event *event,
struct ib_cm_req_event_param *req_data,
void *private_data, int offset)
{
event->param.conn.private_data = private_data + offset;
event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
event->param.conn.responder_resources = req_data->responder_resources;
event->param.conn.initiator_depth = req_data->initiator_depth;
event->param.conn.flow_control = req_data->flow_control;
event->param.conn.retry_count = req_data->retry_count;
event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
event->param.conn.srq = req_data->srq;
event->param.conn.qp_num = req_data->remote_qpn;
}
static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
{
return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
(ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
(id->qp_type == IB_QPT_UD)) ||
(!id->qp_type));
}
static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
{
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int offset, ret;
u8 smac[ETH_ALEN];
u8 alt_smac[ETH_ALEN];
u8 *psmac = smac;
u8 *palt_smac = alt_smac;
int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
RDMA_TRANSPORT_IB) &&
(rdma_port_get_link_layer(cm_id->device,
ib_event->param.req_rcvd.port) ==
IB_LINK_LAYER_ETHERNET));
int is_sidr = 0;
listen_id = cm_id->context;
if (!cma_check_req_qp_type(&listen_id->id, ib_event))
return -EINVAL;
if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id->id.ps);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
is_sidr = 1;
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
} else {
conn_id = cma_new_conn_id(&listen_id->id, ib_event);
cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
ib_event->private_data, offset);
}
if (!conn_id) {
ret = -ENOMEM;
goto err1;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_acquire_dev(conn_id, listen_id);
if (ret)
goto err2;
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
/*
* Protect against the user destroying conn_id from another thread
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret)
goto err3;
if (is_iboe && !is_sidr) {
u32 scope_id = rdma_get_ipv6_scope_id(cm_id->device,
ib_event->param.req_rcvd.port);
if (ib_event->param.req_rcvd.primary_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.primary_path->sgid,
psmac, NULL, scope_id);
else
psmac = NULL;
if (ib_event->param.req_rcvd.alternate_path != NULL)
rdma_addr_find_smac_by_sgid(
&ib_event->param.req_rcvd.alternate_path->sgid,
palt_smac, NULL, scope_id);
else
palt_smac = NULL;
}
/*
* Acquire mutex to prevent user executing rdma_destroy_id()
* while we're accessing the cm_id.
*/
mutex_lock(&lock);
if (is_iboe && !is_sidr)
ib_update_cm_av(cm_id, psmac, palt_smac);
if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) {
cma_dbg(container_of(&conn_id->id, struct rdma_id_private, id), "sending MRA\n");
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
return 0;
err3:
cma_deref_id(conn_id);
/* Destroy the CM ID by returning a non-zero value. */
conn_id->cm_id.ib = NULL;
err2:
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
err1:
mutex_unlock(&listen_id->handler_mutex);
if (conn_id)
rdma_destroy_id(&conn_id->id);
return ret;
}
static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
{
return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
}
static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
struct ib_cm_compare_data *compare)
{
struct cma_hdr *cma_data, *cma_mask;
struct sdp_hh *sdp_data, *sdp_mask;
__be32 ip4_addr;
struct in6_addr ip6_addr;
memset(compare, 0, sizeof *compare);
cma_data = (void *) compare->data;
cma_mask = (void *) compare->mask;
sdp_data = (void *) compare->data;
sdp_mask = (void *) compare->mask;
switch (addr->sa_family) {
case AF_INET:
ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
if (ps == RDMA_PS_SDP) {
sdp_set_ip_ver(sdp_data, 4);
sdp_set_ip_ver(sdp_mask, 0xF);
if (!cma_any_addr(addr)) {
sdp_data->dst_addr.ip4.addr = ip4_addr;
sdp_mask->dst_addr.ip4.addr = htonl(~0);
}
} else {
cma_set_ip_ver(cma_data, 4);
cma_set_ip_ver(cma_mask, 0xF);
if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip4.addr = ip4_addr;
cma_mask->dst_addr.ip4.addr = htonl(~0);
}
}
break;
case AF_INET6:
ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
if (ps == RDMA_PS_SDP) {
sdp_set_ip_ver(sdp_data, 6);
sdp_set_ip_ver(sdp_mask, 0xF);
if (!cma_any_addr(addr)) {
sdp_data->dst_addr.ip6 = ip6_addr;
memset(&sdp_mask->dst_addr.ip6, 0xFF,
sizeof(sdp_mask->dst_addr.ip6));
}
} else {
cma_set_ip_ver(cma_data, 6);
cma_set_ip_ver(cma_mask, 0xF);
if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip6 = ip6_addr;
memset(&cma_mask->dst_addr.ip6, 0xFF,
sizeof(cma_mask->dst_addr.ip6));
}
}
break;
default:
break;
}
}
static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
struct rdma_cm_event event;
struct sockaddr_in *sin;
int ret = 0;
if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
memset(&event, 0, sizeof event);
switch (iw_event->event) {
case IW_CM_EVENT_CLOSE:
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
case IW_CM_EVENT_CONNECT_REPLY:
sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
*sin = iw_event->local_addr;
sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
*sin = iw_event->remote_addr;
switch ((int)iw_event->status) {
case 0:
event.event = RDMA_CM_EVENT_ESTABLISHED;
event.param.conn.initiator_depth = iw_event->ird;
event.param.conn.responder_resources = iw_event->ord;
break;
case -ECONNRESET:
case -ECONNREFUSED:
event.event = RDMA_CM_EVENT_REJECTED;
break;
case -ETIMEDOUT:
event.event = RDMA_CM_EVENT_UNREACHABLE;
break;
default:
event.event = RDMA_CM_EVENT_CONNECT_ERROR;
break;
}
break;
case IW_CM_EVENT_ESTABLISHED:
event.event = RDMA_CM_EVENT_ESTABLISHED;
event.param.conn.initiator_depth = iw_event->ird;
event.param.conn.responder_resources = iw_event->ord;
break;
default:
BUG_ON(1);
}
event.status = iw_event->status;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
}
mutex_unlock(&id_priv->handler_mutex);
return ret;
}
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
struct sockaddr_in *sin;
struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
listen_id = cm_id->context;
if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
/* Create a new RDMA id for the new IW CM ID */
new_cm_id = rdma_create_id(listen_id->id.event_handler,
listen_id->id.context,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
}
conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
if (!dev) {
ret = -EADDRNOTAVAIL;
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
ret = cma_acquire_dev(conn_id, listen_id);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
conn_id->cm_id.iw = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_iw_handler;
sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
*sin = iw_event->local_addr;
sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
*sin = iw_event->remote_addr;
ret = ib_query_device(conn_id->id.device, &attr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
event.param.conn.initiator_depth = iw_event->ird;
event.param.conn.responder_resources = iw_event->ord;
/*
* Protect against the user destroying conn_id from another thread
* until we're done accessing it.
*/
atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
goto out;
}
mutex_unlock(&conn_id->handler_mutex);
cma_deref_id(conn_id);
out:
if (dev)
dev_put(dev);
mutex_unlock(&listen_id->handler_mutex);
return ret;
}
static int cma_ib_listen(struct rdma_id_private *id_priv)
{
struct ib_cm_compare_data compare_data;
struct sockaddr *addr;
struct ib_cm_id *id;
__be64 svc_id;
int ret;
id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.ib = id;
addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
svc_id = cma_get_service_id(id_priv->id.ps, addr);
if (cma_any_addr(addr) && !id_priv->afonly)
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
}
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
id_priv->cm_id.ib = NULL;
}
return ret;
}
static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
{
int ret;
struct sockaddr_in *sin;
struct iw_cm_id *id;
id = iw_create_cm_id(id_priv->id.device,
id_priv->sock,
iw_conn_req_handler,
id_priv);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.iw = id;
sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
id_priv->cm_id.iw->local_addr = *sin;
ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
if (ret) {
iw_destroy_cm_id(id_priv->cm_id.iw);
id_priv->cm_id.iw = NULL;
}
return ret;
}
static int cma_listen_handler(struct rdma_cm_id *id,
struct rdma_cm_event *event)
{
struct rdma_id_private *id_priv = id->context;
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
return id_priv->id.event_handler(id, event);
}
static void cma_listen_on_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
struct rdma_id_private *dev_id_priv;
struct rdma_cm_id *id;
int ret;
id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
id_priv->id.qp_type);
if (IS_ERR(id))
return;
dev_id_priv = container_of(id, struct rdma_id_private, id);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
dev_id_priv->sock = id_priv->sock;
memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
cma_warn(id_priv, "cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name);
}
static void cma_listen_on_all(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev;
mutex_lock(&lock);
list_add_tail(&id_priv->list, &listen_any_list);
list_for_each_entry(cma_dev, &dev_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
}
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
{
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
}
EXPORT_SYMBOL(rdma_set_service_type);
void rdma_set_timeout(struct rdma_cm_id *id, int timeout)
{
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->qp_timeout = (u8) timeout;
}
EXPORT_SYMBOL(rdma_set_timeout);
static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
route = &work->id->id.route;
if (!status) {
route->num_paths = 1;
*route->path_rec = *path_rec;
} else {
work->old_state = RDMA_CM_ROUTE_QUERY;
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
}
queue_work(cma_wq, &work->work);
}
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
struct rdma_addr *addr = &id_priv->id.route.addr;
struct ib_sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
path_rec.service_id = cma_get_service_id(id_priv->id.ps,
(struct sockaddr *) &addr->dst_addr);
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
if (addr->src_addr.ss_family == AF_INET) {
path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
} else {
sin6 = (struct sockaddr_in6 *) &addr->src_addr;
path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
}
id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
id_priv->id.port_num, &path_rec,
comp_mask, timeout_ms,
GFP_KERNEL, cma_query_handler,
work, &id_priv->query);
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
static void cma_work_handler(struct work_struct *_work)
{
struct cma_work *work = container_of(_work, struct cma_work, work);
struct rdma_id_private *id_priv = work->id;
int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
goto out;
if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
out:
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
if (destroy)
rdma_destroy_id(&id_priv->id);
kfree(work);
}
static void cma_ndev_work_handler(struct work_struct *_work)
{
struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
struct rdma_id_private *id_priv = work->id;
int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state == RDMA_CM_DESTROYING ||
id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
out:
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
if (destroy)
rdma_destroy_id(&id_priv->id);
kfree(work);
}
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
{
struct rdma_route *route = &id_priv->id.route;
struct cma_work *work;
int ret;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ROUTE_QUERY;
work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
}
ret = cma_query_ib_route(id_priv, timeout_ms, work);
if (ret)
goto err2;
return 0;
err2:
kfree(route->path_rec);
route->path_rec = NULL;
err1:
kfree(work);
return ret;
}
int rdma_set_ib_paths(struct rdma_cm_id *id,
struct ib_sa_path_rec *path_rec, int num_paths)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
RDMA_CM_ROUTE_RESOLVED))
return -EINVAL;
id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
GFP_KERNEL);
if (!id->route.path_rec) {
ret = -ENOMEM;
goto err;
}
id->route.num_paths = num_paths;
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
return ret;
}
EXPORT_SYMBOL(rdma_set_ib_paths);
static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
{
struct cma_work *work;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ROUTE_QUERY;
work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
}
static u8 tos_to_sl(u8 tos)
{
return def_prec2sl & 7;
}
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
struct rdma_addr *addr = &route->addr;
struct cma_work *work;
int ret;
struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
struct net_device *ndev = NULL;
if (src_addr->sin_family != dst_addr->sin_family)
return -EINVAL;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
}
route->num_paths = 1;
if (addr->dev_addr.bound_dev_if)
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
if (!ndev) {
ret = -ENODEV;
goto err2;
}
route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
memcpy(route->path_rec->smac, IF_LLADDR(ndev), ndev->if_addrlen);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
route->path_rec->sl = tos_to_sl(id_priv->tos);
route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
}
work->old_state = RDMA_CM_ROUTE_QUERY;
work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
work->event.status = 0;
queue_work(cma_wq, &work->work);
return 0;
err2:
kfree(route->path_rec);
route->path_rec = NULL;
err1:
kfree(work);
return ret;
}
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- switch (rdma_port_get_link_layer(id->device, id->port_num)) {
- case IB_LINK_LAYER_INFINIBAND:
- ret = cma_resolve_ib_route(id_priv, timeout_ms);
- break;
- case IB_LINK_LAYER_ETHERNET:
- ret = cma_resolve_iboe_route(id_priv);
- break;
- default:
- ret = -ENOSYS;
- }
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ if (rdma_cap_ib_sa(id->device, id->port_num))
+ ret = cma_resolve_ib_route(id_priv, timeout_ms);
+ else if (rdma_protocol_roce(id->device, id->port_num))
+ ret = cma_resolve_iboe_route(id_priv);
+ else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv, timeout_ms);
- break;
- default:
+ else
ret = -ENOSYS;
- break;
- }
+
if (ret)
goto err;
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type)
{
/* APM is not supported yet */
return -EINVAL;
}
EXPORT_SYMBOL(rdma_enable_apm);
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev;
struct ib_port_attr port_attr;
union ib_gid gid;
u16 pkey;
int ret;
u8 p;
mutex_lock(&lock);
if (list_empty(&dev_list)) {
ret = -ENODEV;
goto out;
}
list_for_each_entry(cma_dev, &dev_list, list)
for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
if (!ib_query_port(cma_dev->device, p, &port_attr) &&
port_attr.state == IB_PORT_ACTIVE)
goto port_found;
p = 1;
cma_dev = list_entry(dev_list.next, struct cma_device, list);
port_found:
ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
if (ret)
goto out;
ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
if (ret)
goto out;
id_priv->id.route.addr.dev_addr.dev_type =
(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
ARPHRD_INFINIBAND : ARPHRD_ETHER;
rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
out:
mutex_unlock(&lock);
return ret;
}
static void addr_handler(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *dev_addr, void *context)
{
struct rdma_id_private *id_priv = context;
struct rdma_cm_event event;
memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
RDMA_CM_ADDR_RESOLVED))
goto out;
memcpy(&id_priv->id.route.addr.src_addr, src_addr,
ip_addr_size(src_addr));
if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv, NULL);
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
RDMA_CM_ADDR_BOUND))
goto out;
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = status;
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id);
return;
}
out:
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
}
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
if (!id_priv->cma_dev) {
ret = cma_bind_loopback(id_priv);
if (ret)
goto err;
}
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
if (cma_zero_addr(src)) {
dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
if ((src->sa_family = dst->sa_family) == AF_INET) {
((struct sockaddr_in *)src)->sin_addr =
((struct sockaddr_in *)dst)->sin_addr;
} else {
((struct sockaddr_in6 *)src)->sin6_addr =
((struct sockaddr_in6 *)dst)->sin6_addr;
}
}
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ADDR_QUERY;
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
err:
kfree(work);
return ret;
}
static int cma_resolve_scif(struct rdma_id_private *id_priv)
{
struct cma_work *work;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
/* we probably can leave it empty here */
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ADDR_QUERY;
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
}
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
src_addr->sa_family = dst_addr->sa_family;
#ifdef INET6
if (dst_addr->sa_family == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
}
#endif
}
if (!cma_any_addr(src_addr))
return rdma_bind_addr(id, src_addr);
else {
#if defined(INET6) || defined(INET)
union {
#ifdef INET
struct sockaddr_in in;
#endif
#ifdef INET6
struct sockaddr_in6 in6;
#endif
} addr;
#endif
switch(dst_addr->sa_family) {
#ifdef INET
case AF_INET:
memset(&addr.in, 0, sizeof(addr.in));
addr.in.sin_family = dst_addr->sa_family;
addr.in.sin_len = sizeof(addr.in);
return rdma_bind_addr(id, (struct sockaddr *)&addr.in);
#endif
#ifdef INET6
case AF_INET6:
memset(&addr.in6, 0, sizeof(addr.in6));
addr.in6.sin6_family = dst_addr->sa_family;
addr.in6.sin6_len = sizeof(addr.in6);
addr.in6.sin6_scope_id =
((struct sockaddr_in6 *)dst_addr)->sin6_scope_id;
return rdma_bind_addr(id, (struct sockaddr *)&addr.in6);
#endif
default:
return -EINVAL;
}
}
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr, int timeout_ms)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
if (ret)
return ret;
}
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
if (cma_any_addr(dst_addr))
ret = cma_resolve_loopback(id_priv);
else if (id_priv->id.device &&
rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF)
ret = cma_resolve_scif(id_priv);
else
ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
dst_addr, &id->route.addr.dev_addr,
timeout_ms, addr_handler, id_priv);
if (ret)
goto err;
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_addr);
int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
{
struct rdma_id_private *id_priv;
unsigned long flags;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
if (id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
ret = -EINVAL;
}
spin_unlock_irqrestore(&id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(rdma_set_reuseaddr);
int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
{
struct rdma_id_private *id_priv;
unsigned long flags;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
id_priv->options |= (1 << CMA_OPTION_AFONLY);
id_priv->afonly = afonly;
ret = 0;
} else {
ret = -EINVAL;
}
spin_unlock_irqrestore(&id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(rdma_set_afonly);
static void cma_bind_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *id_priv)
{
struct sockaddr_in *sin;
sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
sin->sin_port = htons(bind_list->port);
id_priv->bind_list = bind_list;
hlist_add_head(&id_priv->node, &bind_list->owners);
}
static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
unsigned short snum)
{
struct rdma_bind_list *bind_list;
int port, ret;
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
do {
ret = idr_get_new_above(ps, bind_list, snum, &port);
} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
if (ret)
goto err1;
if (port != snum) {
ret = -EADDRNOTAVAIL;
goto err2;
}
bind_list->ps = ps;
bind_list->port = (unsigned short) port;
cma_bind_port(bind_list, id_priv);
return 0;
err2:
idr_remove(ps, port);
err1:
kfree(bind_list);
return ret;
}
static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
int low, high, remaining;
unsigned int rover;
inet_get_local_port_range(&init_net, &low, &high);
remaining = (high - low) + 1;
rover = random() % remaining + low;
retry:
if (last_used_port != rover &&
!idr_find(ps, (unsigned short) rover)) {
int ret = cma_alloc_port(ps, id_priv, rover);
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
*/
if (!ret)
last_used_port = rover;
if (ret != -EADDRNOTAVAIL)
return ret;
}
if (--remaining) {
rover++;
if ((rover < low) || (rover > high))
rover = low;
goto retry;
}
return -EADDRNOTAVAIL;
}
/*
* Check that the requested port is available. This is called when trying to
* bind to a specific port, or when trying to listen on a bound port. In
* the latter case, the provided id_priv may already be on the bind_list, but
* we still need to check that it's okay to start listening.
*/
static int cma_check_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *id_priv, uint8_t reuseaddr)
{
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
cur_id->reuseaddr)
continue;
cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
if (id_priv->afonly && cur_id->afonly &&
(addr->sa_family != cur_addr->sa_family))
continue;
if (cma_any_addr(addr) || cma_any_addr(cur_addr))
return -EADDRNOTAVAIL;
if (!cma_addr_cmp(addr, cur_addr))
return -EADDRINUSE;
}
return 0;
}
static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
unsigned short snum;
int ret;
snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
bind_list = idr_find(ps, snum);
if (!bind_list) {
ret = cma_alloc_port(ps, id_priv, snum);
} else {
ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
if (!ret)
cma_bind_port(bind_list, id_priv);
}
return ret;
}
static int cma_bind_listen(struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list = id_priv->bind_list;
int ret = 0;
mutex_lock(&lock);
if (bind_list->owners.first->next)
ret = cma_check_port(bind_list, id_priv, 0);
mutex_unlock(&lock);
return ret;
}
static int cma_get_tcp_port(struct rdma_id_private *id_priv)
{
int ret;
int size;
struct socket *sock;
ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
return ret;
#ifdef __linux__
ret = sock->ops->bind(sock,
(struct sockaddr *) &id_priv->id.route.addr.src_addr,
ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
#else
+ SOCK_LOCK(sock);
+ sock->so_options |= SO_REUSEADDR;
+ SOCK_UNLOCK(sock);
+
ret = -sobind(sock,
(struct sockaddr *)&id_priv->id.route.addr.src_addr,
curthread);
#endif
if (ret) {
sock_release(sock);
return ret;
}
size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
ret = sock_getname(sock,
(struct sockaddr *) &id_priv->id.route.addr.src_addr,
&size, 0);
if (ret) {
sock_release(sock);
return ret;
}
id_priv->sock = sock;
return 0;
}
static int cma_get_port(struct rdma_id_private *id_priv)
{
+ struct cma_device *cma_dev;
struct idr *ps;
int ret;
switch (id_priv->id.ps) {
case RDMA_PS_SDP:
ps = &sdp_ps;
break;
case RDMA_PS_TCP:
ps = &tcp_ps;
- if (unify_tcp_port_space) {
+
+ mutex_lock(&lock);
+ /* check if there are any iWarp IB devices present */
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ if (rdma_protocol_iwarp(cma_dev->device, 1)) {
+ id_priv->unify_ps_tcp = 1;
+ break;
+ }
+ }
+ mutex_unlock(&lock);
+
+ if (id_priv->unify_ps_tcp) {
ret = cma_get_tcp_port(id_priv);
if (ret)
goto out;
}
break;
case RDMA_PS_UDP:
ps = &udp_ps;
break;
case RDMA_PS_IPOIB:
ps = &ipoib_ps;
break;
case RDMA_PS_IB:
ps = &ib_ps;
break;
default:
return -EPROTONOSUPPORT;
}
mutex_lock(&lock);
if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
ret = cma_alloc_any_port(ps, id_priv);
else
ret = cma_use_port(ps, id_priv);
mutex_unlock(&lock);
out:
return ret;
}
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
#if defined(INET6)
struct sockaddr_in6 *sin6;
if (addr->sa_family != AF_INET6)
return 0;
sin6 = (struct sockaddr_in6 *) addr;
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
#endif
return 0;
}
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == RDMA_CM_IDLE) {
((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
return -EINVAL;
if (id_priv->reuseaddr) {
ret = cma_bind_listen(id_priv);
if (ret)
goto err;
}
id_priv->backlog = backlog;
if (id->device) {
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id->device, 1)) {
ret = cma_ib_listen(id_priv);
if (ret)
goto err;
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id->device, 1)) {
ret = cma_iw_listen(id_priv, backlog);
if (ret)
goto err;
- break;
- default:
+ } else {
ret = -ENOSYS;
goto err;
}
} else
cma_listen_on_all(id_priv);
return 0;
err:
id_priv->backlog = 0;
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
EXPORT_SYMBOL(rdma_listen);
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
#if defined(INET6)
int ipv6only;
size_t var_size = sizeof(int);
#endif
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
return -EAFNOSUPPORT;
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;
ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
if (ret)
goto err1;
memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
if (!cma_any_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr, NULL);
if (ret)
goto err1;
ret = cma_acquire_dev(id_priv, NULL);
if (ret)
goto err1;
}
if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
if (addr->sa_family == AF_INET)
id_priv->afonly = 1;
#if defined(INET6)
else if (addr->sa_family == AF_INET6)
id_priv->afonly = kernel_sysctlbyname(&thread0, "net.inet6.ip6.v6only",
&ipv6only, &var_size, NULL, 0, NULL, 0);
#endif
}
ret = cma_get_port(id_priv);
if (ret)
goto err2;
return 0;
err2:
if (id_priv->cma_dev)
cma_release_dev(id_priv);
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
EXPORT_SYMBOL(rdma_bind_addr);
static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
struct rdma_route *route)
{
struct cma_hdr *cma_hdr;
struct sdp_hh *sdp_hdr;
if (route->addr.src_addr.ss_family == AF_INET) {
struct sockaddr_in *src4, *dst4;
src4 = (struct sockaddr_in *) &route->addr.src_addr;
dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
switch (ps) {
case RDMA_PS_SDP:
sdp_hdr = hdr;
if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
return -EINVAL;
sdp_set_ip_ver(sdp_hdr, 4);
sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
sdp_hdr->port = src4->sin_port;
break;
default:
cma_hdr = hdr;
cma_hdr->cma_version = CMA_VERSION;
cma_set_ip_ver(cma_hdr, 4);
cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
cma_hdr->port = src4->sin_port;
break;
}
} else {
struct sockaddr_in6 *src6, *dst6;
src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
switch (ps) {
case RDMA_PS_SDP:
sdp_hdr = hdr;
if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
return -EINVAL;
sdp_set_ip_ver(sdp_hdr, 6);
sdp_hdr->src_addr.ip6 = src6->sin6_addr;
sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
sdp_hdr->port = src6->sin6_port;
break;
default:
cma_hdr = hdr;
cma_hdr->cma_version = CMA_VERSION;
cma_set_ip_ver(cma_hdr, 6);
cma_hdr->src_addr.ip6 = src6->sin6_addr;
cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
cma_hdr->port = src6->sin6_port;
break;
}
}
return 0;
}
static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event;
struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
int ret = 0;
if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_SIDR_REQ_ERROR:
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -ETIMEDOUT;
break;
case IB_CM_SIDR_REP_RECEIVED:
event.param.ud.private_data = ib_event->private_data;
event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
break;
}
ret = cma_set_qkey(id_priv);
if (ret) {
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = -EINVAL;
break;
}
if (id_priv->qkey != rep->qkey) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -EINVAL;
break;
}
ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
id_priv->id.route.path_rec,
&event.param.ud.ah_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
event.status = 0;
break;
default:
printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
ib_event->event);
goto out;
}
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
return ret;
}
static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_cm_sidr_req_param req;
struct rdma_route *route;
struct ib_cm_id *id;
int ret;
req.private_data_len = sizeof(struct cma_hdr) +
conn_param->private_data_len;
if (req.private_data_len < conn_param->private_data_len)
return -EINVAL;
req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
if (!req.private_data)
return -ENOMEM;
if (conn_param->private_data && conn_param->private_data_len)
memcpy((void *) req.private_data + sizeof(struct cma_hdr),
conn_param->private_data, conn_param->private_data_len);
route = &id_priv->id.route;
ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
if (ret)
goto out;
id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
id_priv);
if (IS_ERR(id)) {
ret = PTR_ERR(id);
goto out;
}
id_priv->cm_id.ib = id;
req.path = route->path_rec;
req.service_id = cma_get_service_id(id_priv->id.ps,
(struct sockaddr *) &route->addr.dst_addr);
req.timeout_ms = 1 << (cma_response_timeout - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
cma_dbg(id_priv, "sending SIDR\n");
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
id_priv->cm_id.ib = NULL;
}
out:
kfree(req.private_data);
return ret;
}
static int cma_connect_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_cm_req_param req;
struct rdma_route *route;
void *private_data;
struct ib_cm_id *id;
int offset, ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv->id.ps);
req.private_data_len = offset + conn_param->private_data_len;
if (req.private_data_len < conn_param->private_data_len)
return -EINVAL;
private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
if (!private_data)
return -ENOMEM;
if (conn_param->private_data && conn_param->private_data_len)
memcpy(private_data + offset, conn_param->private_data,
conn_param->private_data_len);
id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
if (IS_ERR(id)) {
ret = PTR_ERR(id);
goto out;
}
id_priv->cm_id.ib = id;
route = &id_priv->id.route;
ret = cma_format_hdr(private_data, id_priv->id.ps, route);
if (ret)
goto out;
req.private_data = private_data;
req.primary_path = &route->path_rec[0];
if (route->num_paths == 2)
req.alternate_path = &route->path_rec[1];
req.service_id = cma_get_service_id(id_priv->id.ps,
(struct sockaddr *) &route->addr.dst_addr);
req.qp_num = id_priv->qp_num;
req.qp_type = id_priv->id.qp_type;
req.starting_psn = id_priv->seq_num;
req.responder_resources = conn_param->responder_resources;
req.initiator_depth = conn_param->initiator_depth;
req.flow_control = conn_param->flow_control;
req.retry_count = min_t(u8, 7, conn_param->retry_count);
req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
req.remote_cm_response_timeout = cma_response_timeout;
req.local_cm_response_timeout = cma_response_timeout;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
cma_dbg(id_priv, "sending REQ\n");
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
if (ret && !IS_ERR(id)) {
ib_destroy_cm_id(id);
id_priv->cm_id.ib = NULL;
}
kfree(private_data);
return ret;
}
static int cma_connect_iw(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct iw_cm_id *cm_id;
struct sockaddr_in* sin;
int ret;
struct iw_cm_conn_param iw_param;
cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
cma_iw_handler, id_priv);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
id_priv->cm_id.iw = cm_id;
sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
cm_id->local_addr = *sin;
sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
cm_id->remote_addr = *sin;
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
goto out;
if (conn_param) {
iw_param.ord = conn_param->initiator_depth;
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
} else {
memset(&iw_param, 0, sizeof iw_param);
iw_param.qpn = id_priv->qp_num;
}
ret = iw_cm_connect(cm_id, &iw_param);
out:
if (ret) {
iw_destroy_cm_id(cm_id);
id_priv->cm_id.iw = NULL;
}
return ret;
}
int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
if (!id->qp) {
id_priv->qp_num = conn_param->qp_num;
id_priv->srq = conn_param->srq;
}
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id->device, id->port_num)) {
if (id->qp_type == IB_QPT_UD)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id->device, id->port_num))
ret = cma_connect_iw(id_priv, conn_param);
- break;
- default:
+ else
ret = -ENOSYS;
- break;
- }
if (ret)
goto err;
return 0;
err:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
EXPORT_SYMBOL(rdma_connect);
static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct ib_cm_rep_param rep;
int ret;
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
goto out;
ret = cma_modify_qp_rts(id_priv, conn_param);
if (ret)
goto out;
memset(&rep, 0, sizeof rep);
rep.qp_num = id_priv->qp_num;
rep.starting_psn = id_priv->seq_num;
rep.private_data = conn_param->private_data;
rep.private_data_len = conn_param->private_data_len;
rep.responder_resources = conn_param->responder_resources;
rep.initiator_depth = conn_param->initiator_depth;
rep.failover_accepted = 0;
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
cma_dbg(id_priv, "sending REP\n");
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
}
static int cma_accept_iw(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
struct iw_cm_conn_param iw_param;
int ret;
if (!conn_param)
return -EINVAL;
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
iw_param.ord = conn_param->initiator_depth;
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
if (id_priv->id.qp) {
iw_param.qpn = id_priv->qp_num;
} else
iw_param.qpn = conn_param->qp_num;
return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
}
static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
enum ib_cm_sidr_status status,
const void *private_data, int private_data_len)
{
struct ib_cm_sidr_rep_param rep;
int ret;
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
ret = cma_set_qkey(id_priv);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
rep.qkey = id_priv->qkey;
}
rep.private_data = private_data;
rep.private_data_len = private_data_len;
cma_dbg(id_priv, "sending SIDR\n");
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->owner = curthread->td_proc->p_pid;
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
if (!id->qp && conn_param) {
id_priv->qp_num = conn_param->qp_num;
id_priv->srq = conn_param->srq;
}
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id->device, id->port_num)) {
if (id->qp_type == IB_QPT_UD) {
if (conn_param)
ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
conn_param->private_data,
conn_param->private_data_len);
else
ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
NULL, 0);
} else {
if (conn_param)
ret = cma_accept_ib(id_priv, conn_param);
else
ret = cma_rep_recv(id_priv);
}
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id->device, id->port_num))
ret = cma_accept_iw(id_priv, conn_param);
- break;
- default:
+ else
ret = -ENOSYS;
- break;
- }
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(id_priv);
rdma_reject(id, NULL, 0);
return ret;
}
EXPORT_SYMBOL(rdma_accept);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!id_priv->cm_id.ib)
return -EINVAL;
switch (id->device->node_type) {
case RDMA_NODE_IB_CA:
ret = ib_cm_notify(id_priv->cm_id.ib, event);
break;
default:
ret = 0;
break;
}
return ret;
}
EXPORT_SYMBOL(rdma_notify);
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
u8 private_data_len)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!id_priv->cm_id.ib)
return -EINVAL;
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id->device, id->port_num)) {
if (id->qp_type == IB_QPT_UD)
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
private_data, private_data_len);
else {
cma_dbg(id_priv, "sending REJ\n");
ret = ib_send_cm_rej(id_priv->cm_id.ib,
IB_CM_REJ_CONSUMER_DEFINED, NULL,
0, private_data, private_data_len);
}
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
- break;
- default:
+ } else
ret = -ENOSYS;
- break;
- }
+
return ret;
}
EXPORT_SYMBOL(rdma_reject);
int rdma_disconnect(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!id_priv->cm_id.ib)
return -EINVAL;
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ if (rdma_cap_ib_cm(id->device, id->port_num)) {
ret = cma_modify_qp_err(id_priv);
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
cma_dbg(id_priv, "sending DREQ\n");
if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
cma_dbg(id_priv, "sending DREP\n");
ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
}
- break;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_SCIF:
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
- break;
- default:
+ } else
ret = -EINVAL;
- break;
- }
+
out:
return ret;
}
EXPORT_SYMBOL(rdma_disconnect);
static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
{
struct rdma_id_private *id_priv;
struct cma_multicast *mc = multicast->context;
struct rdma_cm_event event;
struct rdma_dev_addr *dev_addr;
int ret;
struct net_device *ndev = NULL;
u16 vlan;
id_priv = mc->id_priv;
dev_addr = &id_priv->id.route.addr.dev_addr;
if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
return 0;
mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
be16_to_cpu(multicast->rec.mlid));
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
event.status = status;
event.param.ud.private_data = mc->context;
ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (!ndev) {
status = -ENODEV;
} else {
vlan = rdma_vlan_dev_vlan_id(ndev);
dev_put(ndev);
}
if (!status) {
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
&event.param.ud.ah_attr);
event.param.ud.ah_attr.vlan_id = vlan;
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
} else {
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
/* mark that the cached record is no longer valid */
if (status != -ENETRESET && status != -EAGAIN) {
spin_lock(&id_priv->lock);
id_priv->is_valid_rec = 0;
spin_unlock(&id_priv->lock);
}
}
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return 0;
}
mutex_unlock(&id_priv->handler_mutex);
return 0;
}
static void cma_set_mgid(struct rdma_id_private *id_priv,
struct sockaddr *addr, union ib_gid *mgid)
{
unsigned char mc_map[MAX_ADDR_LEN];
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct sockaddr_in *sin = (struct sockaddr_in *) addr;
#if defined(INET6)
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
#endif
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
#if defined(INET6)
} else if ((addr->sa_family == AF_INET6) &&
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
#endif
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
}
}
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret = 0;
ib_addr_get_mgid(dev_addr, &id_priv->rec.mgid);
/* cache ipoib bc record */
spin_lock(&id_priv->lock);
if (!id_priv->is_valid_rec)
ret = ib_sa_get_mcmember_rec(id_priv->id.device,
id_priv->id.port_num,
&id_priv->rec.mgid,
&id_priv->rec);
if (ret) {
id_priv->is_valid_rec = 0;
spin_unlock(&id_priv->lock);
return ret;
} else {
rec = id_priv->rec;
id_priv->is_valid_rec = 1;
}
spin_unlock(&id_priv->lock);
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
IB_SA_MCMEMBER_REC_FLOW_LABEL |
IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
if (id_priv->id.ps == RDMA_PS_IPOIB)
comp_mask |= IB_SA_MCMEMBER_REC_RATE |
IB_SA_MCMEMBER_REC_RATE_SELECTOR |
IB_SA_MCMEMBER_REC_MTU_SELECTOR |
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
id_priv->id.port_num, &rec,
comp_mask, GFP_KERNEL,
cma_ib_mc_handler, mc);
return PTR_RET(mc->multicast.ib);
}
static void iboe_mcast_work_handler(struct work_struct *work)
{
struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
struct cma_multicast *mc = mw->mc;
struct ib_sa_multicast *m = mc->multicast.ib;
mc->multicast.ib->context = mc;
cma_ib_mc_handler(0, m);
kref_put(&mc->mcref, release_mc);
kfree(mw);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
{
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
mgid->raw[0] = 0xff;
mgid->raw[1] = 0x0e;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
mgid->raw[5] = 0;
mgid->raw[6] = 0;
mgid->raw[7] = 0;
mgid->raw[8] = 0;
mgid->raw[9] = 0;
mgid->raw[10] = 0xff;
mgid->raw[11] = 0xff;
*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
}
}
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
if (!mc->multicast.ib) {
err = -ENOMEM;
goto out1;
}
cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
goto out2;
}
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
dev_put(ndev);
if (!mc->multicast.ib->rec.mtu) {
err = -EINVAL;
goto out2;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&mc->multicast.ib->rec.port_gid);
work->id = id_priv;
work->mc = mc;
INIT_WORK(&work->work, iboe_mcast_work_handler);
kref_get(&mc->mcref);
queue_work(cma_wq, &work->work);
return 0;
out2:
kfree(mc->multicast.ib);
out1:
kfree(work);
return err;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
void *context)
{
struct rdma_id_private *id_priv;
struct cma_multicast *mc;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
!cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
mc = kmalloc(sizeof *mc, GFP_KERNEL);
if (!mc)
return -ENOMEM;
memcpy(&mc->addr, addr, ip_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
switch (rdma_port_get_link_layer(id->device, id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
ret = cma_join_ib_multicast(id_priv, mc);
break;
case IB_LINK_LAYER_ETHERNET:
kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
break;
default:
ret = -EINVAL;
}
break;
default:
ret = -ENOSYS;
break;
}
if (ret) {
spin_lock_irq(&id_priv->lock);
list_del(&mc->list);
spin_unlock_irq(&id_priv->lock);
kfree(mc);
}
return ret;
}
EXPORT_SYMBOL(rdma_join_multicast);
void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
struct cma_multicast *mc;
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
list_del(&mc->list);
spin_unlock_irq(&id_priv->lock);
if (id->qp)
ib_detach_mcast(id->qp,
&mc->multicast.ib->rec.mgid,
be16_to_cpu(mc->multicast.ib->rec.mlid));
if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
switch (rdma_port_get_link_layer(id->device, id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
break;
case IB_LINK_LAYER_ETHERNET:
kref_put(&mc->mcref, release_mc);
break;
default:
break;
}
}
return;
}
}
spin_unlock_irq(&id_priv->lock);
}
EXPORT_SYMBOL(rdma_leave_multicast);
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
struct cma_ndev_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
if ((dev_addr->bound_dev_if == ndev->if_index) &&
memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->if_xname, &id_priv->id);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
INIT_WORK(&work->work, cma_ndev_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
atomic_inc(&id_priv->refcount);
queue_work(cma_wq, &work->work);
}
return 0;
}
static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
struct net_device *ndev = (struct net_device *)ctx;
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
int ret = NOTIFY_DONE;
/* BONDING related, commented out until the bonding is resolved */
#if 0
if (dev_net(ndev) != &init_net)
return NOTIFY_DONE;
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
return NOTIFY_DONE;
#endif
if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
list_for_each_entry(id_priv, &cma_dev->id_list, list) {
ret = cma_netdev_change(ndev, id_priv);
if (ret)
goto out;
}
out:
mutex_unlock(&lock);
return ret;
}
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;
cma_dev->device = device;
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
INIT_LIST_HEAD(&cma_dev->id_list);
ib_set_client_data(device, &cma_client, cma_dev);
mutex_lock(&lock);
list_add_tail(&cma_dev->list, &dev_list);
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event;
enum rdma_cm_state state;
int ret = 0;
/* Record that we want to remove the device */
state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
if (state == RDMA_CM_DESTROYING)
return 0;
cma_cancel_operation(id_priv, state);
mutex_lock(&id_priv->handler_mutex);
/* Check for destruction from another callback. */
if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
goto out;
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
ret = id_priv->id.event_handler(&id_priv->id, &event);
out:
mutex_unlock(&id_priv->handler_mutex);
return ret;
}
static void cma_process_remove(struct cma_device *cma_dev)
{
struct rdma_id_private *id_priv;
int ret;
mutex_lock(&lock);
while (!list_empty(&cma_dev->id_list)) {
id_priv = list_entry(cma_dev->id_list.next,
struct rdma_id_private, list);
list_del(&id_priv->listen_list);
list_del_init(&id_priv->list);
atomic_inc(&id_priv->refcount);
mutex_unlock(&lock);
ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
cma_deref_id(id_priv);
if (ret)
rdma_destroy_id(&id_priv->id);
mutex_lock(&lock);
}
mutex_unlock(&lock);
cma_deref_dev(cma_dev);
wait_for_completion(&cma_dev->comp);
}
static void cma_remove_one(struct ib_device *device)
{
struct cma_device *cma_dev;
cma_dev = ib_get_client_data(device, &cma_client);
if (!cma_dev)
return;
mutex_lock(&lock);
list_del(&cma_dev->list);
mutex_unlock(&lock);
cma_process_remove(cma_dev);
kfree(cma_dev);
}
static int __init cma_init(void)
{
int ret = -ENOMEM;
cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
return -ENOMEM;
cma_free_wq = create_singlethread_workqueue("rdma_cm_fr");
if (!cma_free_wq)
goto err1;
ib_sa_register_client(&sa_client);
rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
ret = ib_register_client(&cma_client);
if (ret)
goto err;
return 0;
err:
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
destroy_workqueue(cma_free_wq);
err1:
destroy_workqueue(cma_wq);
return ret;
}
static void __exit cma_cleanup(void)
{
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
flush_workqueue(cma_free_wq);
destroy_workqueue(cma_free_wq);
destroy_workqueue(cma_wq);
idr_destroy(&sdp_ps);
idr_destroy(&tcp_ps);
idr_destroy(&udp_ps);
idr_destroy(&ipoib_ps);
idr_destroy(&ib_ps);
}
module_init(cma_init);
module_exit(cma_cleanup);
diff --git a/sys/ofed/drivers/infiniband/core/device.c b/sys/ofed/drivers/infiniband/core/device.c
index a3aee411e39d..bb580edf0040 100644
--- a/sys/ofed/drivers/infiniband/core/device.c
+++ b/sys/ofed/drivers/infiniband/core/device.c
@@ -1,793 +1,791 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
struct ib_client_data {
struct list_head list;
struct ib_client *client;
void * data;
};
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
/*
* device_mutex protects access to both device_list and client_list.
* There's no real point to using multiple locks or something fancier
* like an rwsem: we always access both lists, and we're always
* modifying one list or the other list. In any case this is not a
* hot path so there's no point in trying to optimize.
*/
static DEFINE_MUTEX(device_mutex);
static int ib_device_check_mandatory(struct ib_device *device)
{
#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
static const struct {
size_t offset;
char *name;
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
IB_MANDATORY_FUNC(destroy_ah),
IB_MANDATORY_FUNC(create_qp),
IB_MANDATORY_FUNC(modify_qp),
IB_MANDATORY_FUNC(destroy_qp),
IB_MANDATORY_FUNC(post_send),
IB_MANDATORY_FUNC(post_recv),
IB_MANDATORY_FUNC(create_cq),
IB_MANDATORY_FUNC(destroy_cq),
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
- IB_MANDATORY_FUNC(dereg_mr)
+ IB_MANDATORY_FUNC(dereg_mr),
+ IB_MANDATORY_FUNC(get_port_immutable)
};
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
return -EINVAL;
}
}
return 0;
}
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
list_for_each_entry(device, &device_list, core_list)
if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
return device;
return NULL;
}
static int alloc_name(char *name)
{
unsigned long *inuse;
char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
int i;
inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
if (!inuse)
return -ENOMEM;
list_for_each_entry(device, &device_list, core_list) {
if (!sscanf(device->name, name, &i))
continue;
if (i < 0 || i >= PAGE_SIZE * 8)
continue;
snprintf(buf, sizeof buf, name, i);
if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
free_page((unsigned long) inuse);
snprintf(buf, sizeof buf, name, i);
if (__ib_device_get_by_name(buf))
return -ENFILE;
strlcpy(name, buf, IB_DEVICE_NAME_MAX);
return 0;
}
-static int start_port(struct ib_device *device)
+static int rdma_start_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
}
-static int end_port(struct ib_device *device)
+static int rdma_end_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ?
0 : device->phys_port_cnt;
}
/**
* ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
*
* Low-level drivers should use ib_alloc_device() to allocate &struct
* ib_device. @size is the size of the structure to be allocated,
* including any private data used by the low-level driver.
* ib_dealloc_device() must be used to free structures allocated with
* ib_alloc_device().
*/
struct ib_device *ib_alloc_device(size_t size)
{
struct ib_device *dev;
BUG_ON(size < sizeof (struct ib_device));
dev = kzalloc(size, GFP_KERNEL);
spin_lock_init(&dev->cmd_perf_lock);
return dev;
}
EXPORT_SYMBOL(ib_alloc_device);
/**
* ib_dealloc_device - free an IB device struct
* @device:structure to free
*
* Free a structure allocated with ib_alloc_device().
*/
void ib_dealloc_device(struct ib_device *device)
{
if (device->reg_state == IB_DEV_UNINITIALIZED) {
+ kfree(device->port_immutable);
kfree(device);
return;
}
BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
kobject_put(&device->dev.kobj);
}
EXPORT_SYMBOL(ib_dealloc_device);
static int add_client_context(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context) {
printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
device->name, client->name);
return -ENOMEM;
}
context->client = client;
context->data = NULL;
spin_lock_irqsave(&device->client_data_lock, flags);
list_add(&context->list, &device->client_data_list);
spin_unlock_irqrestore(&device->client_data_lock, flags);
return 0;
}
-static int read_port_table_lengths(struct ib_device *device)
+static int verify_immutable(const struct ib_device *dev, u8 port)
{
- struct ib_port_attr *tprops = NULL;
- int num_ports, ret = -ENOMEM;
- u8 port_index;
-
- tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
- if (!tprops)
- goto out;
-
- num_ports = end_port(device) - start_port(device) + 1;
+ return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
+ rdma_max_mad_size(dev, port) != 0);
+}
- device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
- GFP_KERNEL);
- device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
- GFP_KERNEL);
- if (!device->pkey_tbl_len || !device->gid_tbl_len)
- goto err;
+static int read_port_immutable(struct ib_device *device)
+{
+ int ret;
+ u8 start_port = rdma_start_port(device);
+ u8 end_port = rdma_end_port(device);
+ u8 port;
+
+ /**
+ * device->port_immutable is indexed directly by the port number to make
+ * access to this data as efficient as possible.
+ *
+ * Therefore port_immutable is declared as a 1 based array with
+ * potential empty slots at the beginning.
+ */
+ device->port_immutable = kzalloc(sizeof(*device->port_immutable)
+ * (end_port + 1),
+ GFP_KERNEL);
+ if (!device->port_immutable)
+ return -ENOMEM;
- for (port_index = 0; port_index < num_ports; ++port_index) {
- ret = ib_query_port(device, port_index + start_port(device),
- tprops);
+ for (port = start_port; port <= end_port; ++port) {
+ ret = device->get_port_immutable(device, port,
+ &device->port_immutable[port]);
if (ret)
- goto err;
- device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
- device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
- }
-
- ret = 0;
- goto out;
+ return ret;
-err:
- kfree(device->gid_tbl_len);
- kfree(device->pkey_tbl_len);
-out:
- kfree(tprops);
- return ret;
+ if (verify_immutable(device, port))
+ return -EINVAL;
+ }
+ return 0;
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
int ret;
mutex_lock(&device_mutex);
if (strchr(device->name, '%')) {
ret = alloc_name(device->name);
if (ret)
goto out;
}
if (ib_device_check_mandatory(device)) {
ret = -EINVAL;
goto out;
}
INIT_LIST_HEAD(&device->event_handler_list);
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
- ret = read_port_table_lengths(device);
+
+ ret = read_port_immutable(device);
if (ret) {
- printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
- device->name);
+ printk(KERN_WARNING "Couldn't create per port immutable data %s\n",
+ device->name);
goto out;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
- kfree(device->gid_tbl_len);
- kfree(device->pkey_tbl_len);
+ kfree(device->port_immutable);
goto out;
}
list_add_tail(&device->core_list, &device_list);
device->reg_state = IB_DEV_REGISTERED;
{
struct ib_client *client;
list_for_each_entry(client, &client_list, list)
if (client->add && !add_client_context(device, client))
client->add(device);
}
out:
mutex_unlock(&device_mutex);
return ret;
}
EXPORT_SYMBOL(ib_register_device);
/**
* ib_unregister_device - Unregister an IB device
* @device:Device to unregister
*
* Unregister an IB device. All clients will receive a remove callback.
*/
void ib_unregister_device(struct ib_device *device)
{
struct ib_client *client;
struct ib_client_data *context, *tmp;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry_reverse(client, &client_list, list)
if (client->remove)
client->remove(device);
list_del(&device->core_list);
- kfree(device->gid_tbl_len);
- kfree(device->pkey_tbl_len);
-
mutex_unlock(&device_mutex);
ib_device_unregister_sysfs(device);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
kfree(context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
device->reg_state = IB_DEV_UNREGISTERED;
}
EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_register_client - Register an IB client
* @client:Client to register
*
* Upper level users of the IB drivers can use ib_register_client() to
* register callbacks for IB device addition and removal. When an IB
* device is added, each registered client's add method will be called
* (in the order the clients were registered), and when a device is
* removed, each client's remove method will be called (in the reverse
* order that clients were registered). In addition, when
* ib_register_client() is called, the client will receive an add
* callback for all devices already registered.
*/
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
mutex_lock(&device_mutex);
list_add_tail(&client->list, &client_list);
list_for_each_entry(device, &device_list, core_list)
if (client->add && !add_client_context(device, client))
client->add(device);
mutex_unlock(&device_mutex);
return 0;
}
EXPORT_SYMBOL(ib_register_client);
/**
* ib_unregister_client - Unregister an IB client
* @client:Client to unregister
*
* Upper level users use ib_unregister_client() to remove their client
* registration. When ib_unregister_client() is called, the client
* will receive a remove callback for each IB device still registered.
*/
void ib_unregister_client(struct ib_client *client)
{
struct ib_client_data *context, *tmp;
struct ib_device *device;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list) {
if (client->remove)
client->remove(device);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
if (context->client == client) {
list_del(&context->list);
kfree(context);
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
list_del(&client->list);
mutex_unlock(&device_mutex);
}
EXPORT_SYMBOL(ib_unregister_client);
/**
* ib_get_client_data - Get IB client context
* @device:Device to get context for
* @client:Client to get context for
*
* ib_get_client_data() returns client context set with
* ib_set_client_data().
*/
void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
void *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
ret = context->data;
break;
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_client_data);
/**
* ib_set_client_data - Set IB client context
* @device:Device to set context for
* @client:Client to set context for
* @data:Context to set
*
* ib_set_client_data() sets client context that can be retrieved with
* ib_get_client_data().
*/
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data)
{
struct ib_client_data *context;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->data = data;
goto out;
}
printk(KERN_WARNING "No client context found for %s/%s\n",
device->name, client->name);
out:
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
EXPORT_SYMBOL(ib_set_client_data);
/**
* ib_register_event_handler - Register an IB event handler
* @event_handler:Handler to register
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
* chapter 11 of the InfiniBand Architecture Specification). This
* callback may occur in interrupt context.
*/
int ib_register_event_handler (struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_register_event_handler);
/**
* ib_unregister_event_handler - Unregister an event handler
* @event_handler:Handler to unregister
*
* Unregister an event handler registered with
* ib_register_event_handler().
*/
int ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_del(&event_handler->list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_unregister_event_handler);
/**
* ib_dispatch_event - Dispatch an asynchronous event
* @event:Event to dispatch
*
* Low-level drivers must call ib_dispatch_event() to dispatch the
* event to all registered event handlers when an asynchronous event
* occurs.
*/
void ib_dispatch_event(struct ib_event *event)
{
unsigned long flags;
struct ib_event_handler *handler;
spin_lock_irqsave(&event->device->event_handler_lock, flags);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
}
EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_device - Query IB device attributes
* @device:Device to query
* @device_attr:Device attributes
*
* ib_query_device() returns the attributes of a device through the
* @device_attr pointer.
*/
int ib_query_device(struct ib_device *device,
struct ib_device_attr *device_attr)
{
return device->query_device(device, device_attr);
}
EXPORT_SYMBOL(ib_query_device);
/**
* ib_query_port - Query IB port attributes
* @device:Device to query
* @port_num:Port number to query
* @port_attr:Port attributes
*
* ib_query_port() returns the attributes of a port through the
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
- if (port_num < start_port(device) || port_num > end_port(device))
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
return device->query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
/**
* ib_query_gid - Get GID table entry
* @device:Device to query
* @port_num:Port number to query
* @index:GID table index to query
* @gid:Returned GID
*
* ib_query_gid() fetches the specified GID table entry.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid)
{
return device->query_gid(device, port_num, index, gid);
}
EXPORT_SYMBOL(ib_query_gid);
/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
* @port_num:Port number to query
* @index:P_Key table index to query
* @pkey:Returned P_Key
*
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
return device->query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
/**
* ib_modify_device - Change IB device attributes
* @device:Device to modify
* @device_modify_mask:Mask of attributes to change
* @device_modify:New attribute values
*
* ib_modify_device() changes a device's attributes as specified by
* the @device_modify_mask and @device_modify structure.
*/
int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
if (!device->modify_device)
return -ENOSYS;
return device->modify_device(device, device_modify_mask,
device_modify);
}
EXPORT_SYMBOL(ib_modify_device);
/**
* ib_modify_port - Modifies the attributes for the specified port.
* @device: The device to modify.
* @port_num: The number of the port to modify.
* @port_modify_mask: Mask used to specify which attributes of the port
* to change.
* @port_modify: New attribute values for the port.
*
* ib_modify_port() changes a port's attributes as specified by the
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
if (!device->modify_port)
return -ENOSYS;
- if (port_num < start_port(device) || port_num > end_port(device))
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
port_modify);
}
EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
- for (port = start_port(device); port <= end_port(device); ++port) {
- for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
+ for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
+ for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
*index = i;
return 0;
}
}
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_gid);
/**
* ib_find_pkey - Returns the PKey table index where a specified
* PKey value occurs.
* @device: The device to query.
* @port_num: The port number of the device to search for the PKey.
* @pkey: The PKey value to search for.
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
int partial_ix = -1;
- for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
+ for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
/* if there is full-member pkey take it.*/
if (tmp_pkey & 0x8000) {
*index = i;
return 0;
}
if (partial_ix < 0)
partial_ix = i;
}
}
/*no full-member, if exists take the limited*/
if (partial_ix >= 0) {
*index = partial_ix;
return 0;
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
static int __init ib_core_init(void)
{
int ret;
ib_wq = create_workqueue("infiniband");
if (!ib_wq)
return -ENOMEM;
ret = ib_sysfs_setup();
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
goto err;
}
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
goto err_sysfs;
}
return 0;
err_sysfs:
ib_sysfs_cleanup();
err:
destroy_workqueue(ib_wq);
return ret;
}
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
module_init(ib_core_init);
module_exit(ib_core_cleanup);
static int
ibcore_evhand(module_t mod, int event, void *arg)
{
return (0);
}
static moduledata_t ibcore_mod = {
.name = "ibcore",
.evhand = ibcore_evhand,
};
MODULE_VERSION(ibcore, 1);
MODULE_DEPEND(ibcore, linuxkpi, 1, 1, 1);
DECLARE_MODULE(ibcore, ibcore_mod, SI_SUB_LAST, SI_ORDER_ANY);
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
index adc24aae6e9b..264db51e7758 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,1428 +1,1448 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_user.h"
#include "mthca_memfree.h"
static void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
mad->class_version = 1;
mad->method = IB_MGMT_METHOD_GET;
}
static int mthca_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
struct mthca_dev *mdev = to_mdev(ibdev);
u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
memset(props, 0, sizeof *props);
props->fw_ver = mdev->fw_ver;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(mdev, 1, 1,
1, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
props->device_cap_flags = mdev->device_cap_flags;
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = mdev->limits.page_size_cap;
props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
props->max_qp_wr = mdev->limits.max_wqes;
props->max_sge = mdev->limits.max_sg;
props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
props->max_cqe = mdev->limits.max_cqes;
props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
props->max_srq_wr = mdev->limits.max_srq_wqes;
props->max_srq_sge = mdev->limits.max_srq_sge;
props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = mdev->limits.pkey_table_len;
props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
/*
* If Sinai memory key optimization is being used, then only
* the 8-bit key portion will change. For other HCAs, the
* unused index bits will also be used for FMR remapping.
*/
if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
props->max_map_per_fmr = 255;
else
props->max_map_per_fmr =
(1 << (32 - ilog2(mdev->limits.num_mpts))) - 1;
err = 0;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mthca_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
memset(props, 0, sizeof *props);
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
props->sm_sl = out_mad->data[36] & 0xf;
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
props->max_msg_sz = 0x80000000;
props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
props->max_mtu = out_mad->data[41] & 0xf;
props->active_mtu = out_mad->data[36] >> 4;
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mthca_modify_device(struct ib_device *ibdev,
int mask,
struct ib_device_modify *props)
{
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
memcpy(ibdev->node_desc, props->node_desc, 64);
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
}
return 0;
}
static int mthca_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
struct ib_port_modify *props)
{
struct mthca_set_ib_param set_ib;
struct ib_port_attr attr;
int err;
u8 status;
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
err = mthca_query_port(ibdev, port, &attr);
if (err)
goto out;
set_ib.set_si_guid = 0;
set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
return err;
}
static int mthca_query_pkey(struct ib_device *ibdev,
u8 port, u16 index, u16 *pkey)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mthca_query_gid(struct ib_device *ibdev, u8 port,
int index, union ib_gid *gid)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
memcpy(gid->raw, out_mad->data + 8, 8);
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
port, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mthca_alloc_ucontext_resp uresp;
struct mthca_ucontext *context;
int err;
if (!(to_mdev(ibdev)->active))
return ERR_PTR(-EAGAIN);
memset(&uresp, 0, sizeof uresp);
uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
if (mthca_is_memfree(to_mdev(ibdev)))
uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
else
uresp.uarc_size = 0;
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
if (err) {
kfree(context);
return ERR_PTR(err);
}
context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
if (IS_ERR(context->db_tab)) {
err = PTR_ERR(context->db_tab);
mthca_uar_free(to_mdev(ibdev), &context->uar);
kfree(context);
return ERR_PTR(err);
}
if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
mthca_uar_free(to_mdev(ibdev), &context->uar);
kfree(context);
return ERR_PTR(-EFAULT);
}
context->reg_mr_warned = 0;
return &context->ibucontext;
}
static int mthca_dealloc_ucontext(struct ib_ucontext *context)
{
mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab);
mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
kfree(to_mucontext(context));
return 0;
}
static int mthca_mmap_uar(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
return 0;
}
static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mthca_pd *pd;
int err;
pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context) {
if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
mthca_pd_free(to_mdev(ibdev), pd);
kfree(pd);
return ERR_PTR(-EFAULT);
}
}
return &pd->ibpd;
}
static int mthca_dealloc_pd(struct ib_pd *pd)
{
mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
kfree(pd);
return 0;
}
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
struct ib_ah_attr *ah_attr)
{
int err;
struct mthca_ah *ah;
ah = kmalloc(sizeof *ah, GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
if (err) {
kfree(ah);
return ERR_PTR(err);
}
return &ah->ibah;
}
static int mthca_ah_destroy(struct ib_ah *ah)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
kfree(ah);
return 0;
}
static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mthca_create_srq ucmd;
struct mthca_ucontext *context = NULL;
struct mthca_srq *srq;
int err;
srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
if (pd->uobject) {
context = to_mucontext(pd->uobject->context);
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
goto err_free;
}
err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
context->db_tab, ucmd.db_index,
ucmd.db_page);
if (err)
goto err_free;
srq->mr.ibmr.lkey = ucmd.lkey;
srq->db_index = ucmd.db_index;
}
err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
&init_attr->attr, srq);
if (err && pd->uobject)
mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
context->db_tab, ucmd.db_index);
if (err)
goto err_free;
if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
mthca_free_srq(to_mdev(pd->device), srq);
err = -EFAULT;
goto err_free;
}
return &srq->ibsrq;
err_free:
kfree(srq);
return ERR_PTR(err);
}
static int mthca_destroy_srq(struct ib_srq *srq)
{
struct mthca_ucontext *context;
if (srq->uobject) {
context = to_mucontext(srq->uobject->context);
mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
context->db_tab, to_msrq(srq)->db_index);
}
mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
kfree(srq);
return 0;
}
static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct mthca_create_qp ucmd;
struct mthca_qp *qp;
int err;
if (init_attr->create_flags)
return ERR_PTR(-EINVAL);
switch (init_attr->qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
{
struct mthca_ucontext *context;
qp = kmalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
if (pd->uobject) {
context = to_mucontext(pd->uobject->context);
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
kfree(qp);
return ERR_PTR(-EFAULT);
}
err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
context->db_tab,
ucmd.sq_db_index, ucmd.sq_db_page);
if (err) {
kfree(qp);
return ERR_PTR(err);
}
err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
context->db_tab,
ucmd.rq_db_index, ucmd.rq_db_page);
if (err) {
mthca_unmap_user_db(to_mdev(pd->device),
&context->uar,
context->db_tab,
ucmd.sq_db_index);
kfree(qp);
return ERR_PTR(err);
}
qp->mr.ibmr.lkey = ucmd.lkey;
qp->sq.db_index = ucmd.sq_db_index;
qp->rq.db_index = ucmd.rq_db_index;
}
err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
&init_attr->cap, qp);
if (err && pd->uobject) {
context = to_mucontext(pd->uobject->context);
mthca_unmap_user_db(to_mdev(pd->device),
&context->uar,
context->db_tab,
ucmd.sq_db_index);
mthca_unmap_user_db(to_mdev(pd->device),
&context->uar,
context->db_tab,
ucmd.rq_db_index);
}
qp->ibqp.qp_num = qp->qpn;
break;
}
case IB_QPT_SMI:
case IB_QPT_GSI:
{
/* Don't allow userspace to create special QPs */
if (pd->uobject)
return ERR_PTR(-EINVAL);
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
to_msqp(qp));
break;
}
default:
/* Don't support raw QPs */
return ERR_PTR(-ENOSYS);
}
if (err) {
kfree(qp);
return ERR_PTR(err);
}
init_attr->cap.max_send_wr = qp->sq.max;
init_attr->cap.max_recv_wr = qp->rq.max;
init_attr->cap.max_send_sge = qp->sq.max_gs;
init_attr->cap.max_recv_sge = qp->rq.max_gs;
init_attr->cap.max_inline_data = qp->max_inline_data;
return &qp->ibqp;
}
static int mthca_destroy_qp(struct ib_qp *qp)
{
if (qp->uobject) {
mthca_unmap_user_db(to_mdev(qp->device),
&to_mucontext(qp->uobject->context)->uar,
to_mucontext(qp->uobject->context)->db_tab,
to_mqp(qp)->sq.db_index);
mthca_unmap_user_db(to_mdev(qp->device),
&to_mucontext(qp->uobject->context)->uar,
to_mucontext(qp->uobject->context)->db_tab,
to_mqp(qp)->rq.db_index);
}
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
kfree(qp);
return 0;
}
static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;
int entries = attr->cqe;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
return ERR_PTR(-EINVAL);
if (context) {
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return ERR_PTR(-EFAULT);
err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab,
ucmd.set_db_index, ucmd.set_db_page);
if (err)
return ERR_PTR(err);
err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab,
ucmd.arm_db_index, ucmd.arm_db_page);
if (err)
goto err_unmap_set;
}
cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (!cq) {
err = -ENOMEM;
goto err_unmap_arm;
}
if (context) {
cq->buf.mr.ibmr.lkey = ucmd.lkey;
cq->set_ci_db_index = ucmd.set_db_index;
cq->arm_db_index = ucmd.arm_db_index;
}
for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */
err = mthca_init_cq(to_mdev(ibdev), nent,
context ? to_mucontext(context) : NULL,
context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
cq);
if (err)
goto err_free;
if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
goto err_free;
}
cq->resize_buf = NULL;
return &cq->ibcq;
err_free:
kfree(cq);
err_unmap_arm:
if (context)
mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab, ucmd.arm_db_index);
err_unmap_set:
if (context)
mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab, ucmd.set_db_index);
return ERR_PTR(err);
}
static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
int entries)
{
int ret;
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
ret = -EBUSY;
goto unlock;
}
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
if (!cq->resize_buf) {
ret = -ENOMEM;
goto unlock;
}
cq->resize_buf->state = CQ_RESIZE_ALLOC;
ret = 0;
unlock:
spin_unlock_irq(&cq->lock);
if (ret)
return ret;
ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
if (ret) {
spin_lock_irq(&cq->lock);
kfree(cq->resize_buf);
cq->resize_buf = NULL;
spin_unlock_irq(&cq->lock);
return ret;
}
cq->resize_buf->cqe = entries - 1;
spin_lock_irq(&cq->lock);
cq->resize_buf->state = CQ_RESIZE_READY;
spin_unlock_irq(&cq->lock);
return 0;
}
static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(ibcq->device);
struct mthca_cq *cq = to_mcq(ibcq);
struct mthca_resize_cq ucmd;
u32 lkey;
u8 status;
int ret;
if (entries < 1 || entries > dev->limits.max_cqes)
return -EINVAL;
mutex_lock(&cq->mutex);
entries = roundup_pow_of_two(entries + 1);
if (entries == ibcq->cqe + 1) {
ret = 0;
goto out;
}
if (cq->is_kernel) {
ret = mthca_alloc_resize_buf(dev, cq, entries);
if (ret)
goto out;
lkey = cq->resize_buf->buf.mr.ibmr.lkey;
} else {
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
ret = -EFAULT;
goto out;
}
lkey = ucmd.lkey;
}
ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status);
if (status)
ret = -EINVAL;
if (ret) {
if (cq->resize_buf) {
mthca_free_cq_buf(dev, &cq->resize_buf->buf,
cq->resize_buf->cqe);
kfree(cq->resize_buf);
spin_lock_irq(&cq->lock);
cq->resize_buf = NULL;
spin_unlock_irq(&cq->lock);
}
goto out;
}
if (cq->is_kernel) {
struct mthca_cq_buf tbuf;
int tcqe;
spin_lock_irq(&cq->lock);
if (cq->resize_buf->state == CQ_RESIZE_READY) {
mthca_cq_resize_copy_cqes(cq);
tbuf = cq->buf;
tcqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
} else {
tbuf = cq->resize_buf->buf;
tcqe = cq->resize_buf->cqe;
}
kfree(cq->resize_buf);
cq->resize_buf = NULL;
spin_unlock_irq(&cq->lock);
mthca_free_cq_buf(dev, &tbuf, tcqe);
} else
ibcq->cqe = entries - 1;
out:
mutex_unlock(&cq->mutex);
return ret;
}
static int mthca_destroy_cq(struct ib_cq *cq)
{
if (cq->uobject) {
mthca_unmap_user_db(to_mdev(cq->device),
&to_mucontext(cq->uobject->context)->uar,
to_mucontext(cq->uobject->context)->db_tab,
to_mcq(cq)->arm_db_index);
mthca_unmap_user_db(to_mdev(cq->device),
&to_mucontext(cq->uobject->context)->uar,
to_mucontext(cq->uobject->context)->db_tab,
to_mcq(cq)->set_ci_db_index);
}
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
kfree(cq);
return 0;
}
static inline u32 convert_access(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) |
(acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) |
MTHCA_MPT_FLAG_LOCAL_READ;
}
static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mthca_mr *mr;
int err;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
err = mthca_mr_alloc_notrans(to_mdev(pd->device),
to_mpd(pd)->pd_num,
convert_access(acc), mr);
if (err) {
kfree(mr);
return ERR_PTR(err);
}
mr->umem = NULL;
return &mr->ibmr;
}
static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf,
int acc,
u64 *iova_start)
{
struct mthca_mr *mr;
u64 *page_list;
u64 total_size;
unsigned long mask;
int shift;
int npages;
int err;
int i, j, n;
mask = buffer_list[0].addr ^ *iova_start;
total_size = 0;
for (i = 0; i < num_phys_buf; ++i) {
if (i != 0)
mask |= buffer_list[i].addr;
if (i != num_phys_buf - 1)
mask |= buffer_list[i].addr + buffer_list[i].size;
total_size += buffer_list[i].size;
}
if (mask & ~PAGE_MASK)
return ERR_PTR(-EINVAL);
shift = __ffs(mask | 1 << 31);
buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
buffer_list[0].addr &= ~0ull << shift;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
npages = 0;
for (i = 0; i < num_phys_buf; ++i)
npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
if (!npages)
return &mr->ibmr;
page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
if (!page_list) {
kfree(mr);
return ERR_PTR(-ENOMEM);
}
n = 0;
for (i = 0; i < num_phys_buf; ++i)
for (j = 0;
j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
++j)
page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
"in PD %x; shift %d, npages %d.\n",
(unsigned long long) buffer_list[0].addr,
(unsigned long long) *iova_start,
to_mpd(pd)->pd_num,
shift, npages);
err = mthca_mr_alloc_phys(to_mdev(pd->device),
to_mpd(pd)->pd_num,
page_list, shift, npages,
*iova_start, total_size,
convert_access(acc), mr);
if (err) {
kfree(page_list);
kfree(mr);
return ERR_PTR(err);
}
kfree(page_list);
mr->umem = NULL;
return &mr->ibmr;
}
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata, int mr_id)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
int i, k, entry;
int err = 0;
int write_mtt_size;
if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
curproc->p_comm);
mthca_warn(dev, " Update libmthca to fix this.\n");
}
++to_mucontext(pd->uobject->context)->reg_mr_warned;
ucmd.mr_attrs = 0;
} else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return ERR_PTR(-EFAULT);
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
ucmd.mr_attrs & MTHCA_MR_DMASYNC);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
}
shift = ffs(mr->umem->page_size) - 1;
n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt);
goto err_umem;
}
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages) {
err = -ENOMEM;
goto err_mtt;
}
i = n = 0;
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
*/
if (i == write_mtt_size) {
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
if (err)
goto mtt_done;
n += i;
i = 0;
}
}
}
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
mtt_done:
free_page((unsigned long) pages);
if (err)
goto err_mtt;
err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
convert_access(acc), mr);
if (err)
goto err_mtt;
return &mr->ibmr;
err_mtt:
mthca_free_mtt(dev, mr->mtt);
err_umem:
ib_umem_release(mr->umem);
err:
kfree(mr);
return ERR_PTR(err);
}
static int mthca_dereg_mr(struct ib_mr *mr)
{
struct mthca_mr *mmr = to_mmr(mr);
mthca_free_mr(to_mdev(mr->device), mmr);
if (mmr->umem)
ib_umem_release(mmr->umem);
kfree(mmr);
return 0;
}
static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct mthca_fmr *fmr;
int err;
fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
if (!fmr)
return ERR_PTR(-ENOMEM);
memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
convert_access(mr_access_flags), fmr);
if (err) {
kfree(fmr);
return ERR_PTR(err);
}
return &fmr->ibmr;
}
static int mthca_dealloc_fmr(struct ib_fmr *fmr)
{
struct mthca_fmr *mfmr = to_mfmr(fmr);
int err;
err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
if (err)
return err;
kfree(mfmr);
return 0;
}
static int mthca_unmap_fmr(struct list_head *fmr_list)
{
struct ib_fmr *fmr;
int err;
u8 status;
struct mthca_dev *mdev = NULL;
list_for_each_entry(fmr, fmr_list, list) {
if (mdev && to_mdev(fmr->device) != mdev)
return -EINVAL;
mdev = to_mdev(fmr->device);
}
if (!mdev)
return 0;
if (mthca_is_memfree(mdev)) {
list_for_each_entry(fmr, fmr_list, list)
mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
wmb();
} else
list_for_each_entry(fmr, fmr_list, list)
mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
err = mthca_SYNC_TPT(mdev, &status);
if (err)
return err;
if (status)
return -EINVAL;
return 0;
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->rev_id);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
switch (dev->pdev->device) {
case PCI_DEVICE_ID_MELLANOX_TAVOR:
return sprintf(buf, "MT23108\n");
case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
case PCI_DEVICE_ID_MELLANOX_ARBEL:
return sprintf(buf, "MT25208\n");
case PCI_DEVICE_ID_MELLANOX_SINAI:
case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
return sprintf(buf, "MT25204\n");
default:
return sprintf(buf, "unknown\n");
}
}
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mthca_dev_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
static int mthca_init_node_data(struct mthca_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mthca_MAD_IFC(dev, 1, 1,
1, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(dev, 1, 1,
1, NULL, NULL, in_mad, out_mad,
&status);
if (err)
goto out;
if (status) {
err = -EINVAL;
goto out;
}
if (mthca_is_memfree(dev))
dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
+static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
int i;
ret = mthca_init_node_data(dev);
if (ret)
return ret;
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dma_device = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
dev->ib_dev.modify_device = mthca_modify_device;
dev->ib_dev.modify_port = mthca_modify_port;
dev->ib_dev.query_pkey = mthca_query_pkey;
dev->ib_dev.query_gid = mthca_query_gid;
dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
dev->ib_dev.mmap = mthca_mmap_uar;
dev->ib_dev.alloc_pd = mthca_alloc_pd;
dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
dev->ib_dev.create_ah = mthca_ah_create;
dev->ib_dev.query_ah = mthca_ah_query;
dev->ib_dev.destroy_ah = mthca_ah_destroy;
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
dev->ib_dev.create_srq = mthca_create_srq;
dev->ib_dev.modify_srq = mthca_modify_srq;
dev->ib_dev.query_srq = mthca_query_srq;
dev->ib_dev.destroy_srq = mthca_destroy_srq;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
if (mthca_is_memfree(dev))
dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
else
dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
}
dev->ib_dev.create_qp = mthca_create_qp;
dev->ib_dev.modify_qp = mthca_modify_qp;
dev->ib_dev.query_qp = mthca_query_qp;
dev->ib_dev.destroy_qp = mthca_destroy_qp;
dev->ib_dev.create_cq = mthca_create_cq;
dev->ib_dev.resize_cq = mthca_resize_cq;
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
+ dev->ib_dev.get_port_immutable = mthca_port_immutable;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
if (mthca_is_memfree(dev))
dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
else
dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
}
dev->ib_dev.attach_mcast = mthca_multicast_attach;
dev->ib_dev.detach_mcast = mthca_multicast_detach;
dev->ib_dev.process_mad = mthca_process_mad;
if (mthca_is_memfree(dev)) {
dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
dev->ib_dev.post_send = mthca_arbel_post_send;
dev->ib_dev.post_recv = mthca_arbel_post_receive;
} else {
dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
dev->ib_dev.post_send = mthca_tavor_post_send;
dev->ib_dev.post_recv = mthca_tavor_post_receive;
}
mutex_init(&dev->cap_mask_mutex);
ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
ret = device_create_file(&dev->ib_dev.dev,
mthca_dev_attributes[i]);
if (ret) {
ib_unregister_device(&dev->ib_dev);
return ret;
}
}
mthca_start_catas_poll(dev);
return 0;
}
void mthca_unregister_device(struct mthca_dev *dev)
{
mthca_stop_catas_poll(dev);
ib_unregister_device(&dev->ib_dev);
}
diff --git a/sys/ofed/include/rdma/ib_mad.h b/sys/ofed/include/rdma/ib_mad.h
index 3d81b90cc315..1a7fb1ae564c 100644
--- a/sys/ofed/include/rdma/ib_mad.h
+++ b/sys/ofed/include/rdma/ib_mad.h
@@ -1,664 +1,665 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(IB_MAD_H)
#define IB_MAD_H
#include <linux/list.h>
#include <rdma/ib_verbs.h>
/* Management base version */
#define IB_MGMT_BASE_VERSION 1
/* Management classes */
#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81
#define IB_MGMT_CLASS_SUBN_ADM 0x03
#define IB_MGMT_CLASS_PERF_MGMT 0x04
#define IB_MGMT_CLASS_BM 0x05
#define IB_MGMT_CLASS_DEVICE_MGMT 0x06
#define IB_MGMT_CLASS_CM 0x07
#define IB_MGMT_CLASS_SNMP 0x08
#define IB_MGMT_CLASS_DEVICE_ADM 0x10
#define IB_MGMT_CLASS_BOOT_MGMT 0x11
#define IB_MGMT_CLASS_BIS 0x12
#define IB_MGMT_CLASS_CONG_MGMT 0x21
#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30
#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F
#define IB_OPENIB_OUI (0x001405)
/* Management methods */
#define IB_MGMT_METHOD_GET 0x01
#define IB_MGMT_METHOD_SET 0x02
#define IB_MGMT_METHOD_GET_RESP 0x81
#define IB_MGMT_METHOD_SEND 0x03
#define IB_MGMT_METHOD_TRAP 0x05
#define IB_MGMT_METHOD_REPORT 0x06
#define IB_MGMT_METHOD_REPORT_RESP 0x86
#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
#define IB_MGMT_METHOD_RESP 0x80
#define IB_BM_ATTR_MOD_RESP cpu_to_be32(1)
#define IB_MGMT_MAX_METHODS 128
/* MAD Status field bit masks */
#define IB_MGMT_MAD_STATUS_SUCCESS 0x0000
#define IB_MGMT_MAD_STATUS_BUSY 0x0001
#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002
#define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004
#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008
#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c
#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c
/* RMPP information */
#define IB_MGMT_RMPP_VERSION 1
#define IB_MGMT_RMPP_TYPE_DATA 1
#define IB_MGMT_RMPP_TYPE_ACK 2
#define IB_MGMT_RMPP_TYPE_STOP 3
#define IB_MGMT_RMPP_TYPE_ABORT 4
#define IB_MGMT_RMPP_FLAG_ACTIVE 1
#define IB_MGMT_RMPP_FLAG_FIRST (1<<1)
#define IB_MGMT_RMPP_FLAG_LAST (1<<2)
#define IB_MGMT_RMPP_NO_RESPTIME 0x1F
#define IB_MGMT_RMPP_STATUS_SUCCESS 0
#define IB_MGMT_RMPP_STATUS_RESX 1
#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118
#define IB_MGMT_RMPP_STATUS_T2L 118
#define IB_MGMT_RMPP_STATUS_BAD_LEN 119
#define IB_MGMT_RMPP_STATUS_BAD_SEG 120
#define IB_MGMT_RMPP_STATUS_BADT 121
#define IB_MGMT_RMPP_STATUS_W2S 122
#define IB_MGMT_RMPP_STATUS_S2B 123
#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124
#define IB_MGMT_RMPP_STATUS_UNV 125
#define IB_MGMT_RMPP_STATUS_TMR 126
#define IB_MGMT_RMPP_STATUS_UNSPEC 127
#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127
#define IB_QP0 0
#define IB_QP1 cpu_to_be32(1)
#define IB_QP1_QKEY 0x80010000
#define IB_QP_SET_QKEY 0x80000000
#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF
#define IB_DEFAULT_PKEY_FULL 0xFFFF
enum {
IB_MGMT_MAD_HDR = 24,
IB_MGMT_MAD_DATA = 232,
IB_MGMT_RMPP_HDR = 36,
IB_MGMT_RMPP_DATA = 220,
IB_MGMT_VENDOR_HDR = 40,
IB_MGMT_VENDOR_DATA = 216,
IB_MGMT_SA_HDR = 56,
IB_MGMT_SA_DATA = 200,
IB_MGMT_DEVICE_HDR = 64,
IB_MGMT_DEVICE_DATA = 192,
+ IB_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + IB_MGMT_MAD_DATA,
};
struct ib_mad_hdr {
u8 base_version;
u8 mgmt_class;
u8 class_version;
u8 method;
__be16 status;
__be16 class_specific;
__be64 tid;
__be16 attr_id;
__be16 resv;
__be32 attr_mod;
};
struct ib_rmpp_hdr {
u8 rmpp_version;
u8 rmpp_type;
u8 rmpp_rtime_flags;
u8 rmpp_status;
__be32 seg_num;
__be32 paylen_newwin;
};
typedef u64 __bitwise ib_sa_comp_mask;
#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
/*
* ib_sa_hdr and ib_sa_mad structures must be packed because they have
* 64-bit fields that are only 32-bit aligned. 64-bit architectures will
* lay them out wrong otherwise. (And unfortunately they are sent on
* the wire so we can't change the layout)
*/
struct ib_sa_hdr {
__be64 sm_key;
__be16 attr_offset;
__be16 reserved;
ib_sa_comp_mask comp_mask;
} __attribute__ ((packed));
struct ib_mad {
struct ib_mad_hdr mad_hdr;
u8 data[IB_MGMT_MAD_DATA];
};
struct ib_rmpp_mad {
struct ib_mad_hdr mad_hdr;
struct ib_rmpp_hdr rmpp_hdr;
u8 data[IB_MGMT_RMPP_DATA];
};
struct ib_sa_mad {
struct ib_mad_hdr mad_hdr;
struct ib_rmpp_hdr rmpp_hdr;
struct ib_sa_hdr sa_hdr;
u8 data[IB_MGMT_SA_DATA];
} __attribute__ ((packed));
struct ib_vendor_mad {
struct ib_mad_hdr mad_hdr;
struct ib_rmpp_hdr rmpp_hdr;
u8 reserved;
u8 oui[3];
u8 data[IB_MGMT_VENDOR_DATA];
};
struct ib_class_port_info {
u8 base_version;
u8 class_version;
__be16 capability_mask;
u8 reserved[3];
u8 resp_time_value;
u8 redirect_gid[16];
__be32 redirect_tcslfl;
__be16 redirect_lid;
__be16 redirect_pkey;
__be32 redirect_qp;
__be32 redirect_qkey;
u8 trap_gid[16];
__be32 trap_tcslfl;
__be16 trap_lid;
__be16 trap_pkey;
__be32 trap_hlqp;
__be32 trap_qkey;
};
/**
* ib_mad_send_buf - MAD data buffer and work request for sends.
* @next: A pointer used to chain together MADs for posting.
* @mad: References an allocated MAD data buffer for MADs that do not have
* RMPP active. For MADs using RMPP, references the common and management
* class specific headers.
* @mad_agent: MAD agent that allocated the buffer.
* @ah: The address handle to use when sending the MAD.
* @context: User-controlled context fields.
* @hdr_len: Indicates the size of the data header of the MAD. This length
* includes the common MAD, RMPP, and class specific headers.
* @data_len: Indicates the total size of user-transferred data.
* @seg_count: The number of RMPP segments allocated for this send.
* @seg_size: Size of each RMPP segment.
* @timeout_ms: Time to wait for a response.
* @retries: Number of times to retry a request for a response. For MADs
* using RMPP, this applies per window. On completion, returns the number
* of retries needed to complete the transfer.
*
* Users are responsible for initializing the MAD buffer itself, with the
* exception of any RMPP header. Additional segment buffer space allocated
* beyond data_len is padding.
*/
struct ib_mad_send_buf {
struct ib_mad_send_buf *next;
void *mad;
struct ib_mad_agent *mad_agent;
struct ib_ah *ah;
void *context[2];
int hdr_len;
int data_len;
int seg_count;
int seg_size;
int timeout_ms;
int retries;
};
/**
* ib_response_mad - Returns if the specified MAD has been generated in
* response to a sent request or trap.
*/
int ib_response_mad(struct ib_mad *mad);
/**
* ib_get_rmpp_resptime - Returns the RMPP response time.
* @rmpp_hdr: An RMPP header.
*/
static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr)
{
return rmpp_hdr->rmpp_rtime_flags >> 3;
}
/**
* ib_get_rmpp_flags - Returns the RMPP flags.
* @rmpp_hdr: An RMPP header.
*/
static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr)
{
return rmpp_hdr->rmpp_rtime_flags & 0x7;
}
/**
* ib_set_rmpp_resptime - Sets the response time in an RMPP header.
* @rmpp_hdr: An RMPP header.
* @rtime: The response time to set.
*/
static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime)
{
rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3);
}
/**
* ib_set_rmpp_flags - Sets the flags in an RMPP header.
* @rmpp_hdr: An RMPP header.
* @flags: The flags to set.
*/
static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags)
{
rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF8) |
(flags & 0x7);
}
struct ib_mad_agent;
struct ib_mad_send_wc;
struct ib_mad_recv_wc;
/**
* ib_mad_send_handler - callback handler for a sent MAD.
* @mad_agent: MAD agent that sent the MAD.
* @mad_send_wc: Send work completion information on the sent MAD.
*/
typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc);
/**
* ib_mad_snoop_handler - Callback handler for snooping sent MADs.
* @mad_agent: MAD agent that snooped the MAD.
* @send_wr: Work request information on the sent MAD.
* @mad_send_wc: Work completion information on the sent MAD. Valid
* only for snooping that occurs on a send completion.
*
* Clients snooping MADs should not modify data referenced by the @send_wr
* or @mad_send_wc.
*/
typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
struct ib_mad_send_buf *send_buf,
struct ib_mad_send_wc *mad_send_wc);
/**
* ib_mad_recv_handler - callback handler for a received MAD.
* @mad_agent: MAD agent requesting the received MAD.
* @mad_recv_wc: Received work completion information on the received MAD.
*
* MADs received in response to a send request operation will be handed to
* the user before the send operation completes. All data buffers given
* to registered agents through this routine are owned by the receiving
* client, except for snooping agents. Clients snooping MADs should not
* modify the data referenced by @mad_recv_wc.
*/
typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc);
/**
* ib_mad_agent - Used to track MAD registration with the access layer.
* @device: Reference to device registration is on.
* @qp: Reference to QP used for sending and receiving MADs.
* @mr: Memory region for system memory usable for DMA.
* @recv_handler: Callback handler for a received MAD.
* @send_handler: Callback handler for a sent MAD.
* @snoop_handler: Callback handler for snooped sent MADs.
* @context: User-specified context associated with this registration.
* @hi_tid: Access layer assigned transaction ID for this client.
* Unsolicited MADs sent by this client will have the upper 32-bits
* of their TID set to this value.
* @port_num: Port number on which QP is registered
* @rmpp_version: If set, indicates the RMPP version used by this agent.
*/
struct ib_mad_agent {
struct ib_device *device;
struct ib_qp *qp;
struct ib_mr *mr;
ib_mad_recv_handler recv_handler;
ib_mad_send_handler send_handler;
ib_mad_snoop_handler snoop_handler;
void *context;
u32 hi_tid;
u8 port_num;
u8 rmpp_version;
};
/**
* ib_mad_send_wc - MAD send completion information.
* @send_buf: Send MAD data buffer associated with the send MAD request.
* @status: Completion status.
* @vendor_err: Optional vendor error information returned with a failed
* request.
*/
struct ib_mad_send_wc {
struct ib_mad_send_buf *send_buf;
enum ib_wc_status status;
u32 vendor_err;
};
/**
* ib_mad_recv_buf - received MAD buffer information.
* @list: Reference to next data buffer for a received RMPP MAD.
* @grh: References a data buffer containing the global route header.
* The data refereced by this buffer is only valid if the GRH is
* valid.
* @mad: References the start of the received MAD.
*/
struct ib_mad_recv_buf {
struct list_head list;
struct ib_grh *grh;
struct ib_mad *mad;
};
/**
* ib_mad_recv_wc - received MAD information.
* @wc: Completion information for the received data.
* @recv_buf: Specifies the location of the received data buffer(s).
* @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
* @mad_len: The length of the received MAD, without duplicated headers.
*
* For received response, the wr_id contains a pointer to the ib_mad_send_buf
* for the corresponding send request.
*/
struct ib_mad_recv_wc {
struct ib_wc *wc;
struct ib_mad_recv_buf recv_buf;
struct list_head rmpp_list;
int mad_len;
};
/**
* ib_mad_reg_req - MAD registration request
* @mgmt_class: Indicates which management class of MADs should be receive
* by the caller. This field is only required if the user wishes to
* receive unsolicited MADs, otherwise it should be 0.
* @mgmt_class_version: Indicates which version of MADs for the given
* management class to receive.
* @oui: Indicates IEEE OUI when mgmt_class is a vendor class
* in the range from 0x30 to 0x4f. Otherwise not used.
* @method_mask: The caller will receive unsolicited MADs for any method
* where @method_mask = 1.
*/
struct ib_mad_reg_req {
u8 mgmt_class;
u8 mgmt_class_version;
u8 oui[3];
DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS);
};
/**
* ib_register_mad_agent - Register to send/receive MADs.
* @device: The device to register with.
* @port_num: The port on the specified device to use.
* @qp_type: Specifies which QP to access. Must be either
* IB_QPT_SMI or IB_QPT_GSI.
* @mad_reg_req: Specifies which unsolicited MADs should be received
* by the caller. This parameter may be NULL if the caller only
* wishes to receive solicited responses.
* @rmpp_version: If set, indicates that the client will send
* and receive MADs that contain the RMPP header for the given version.
* If set to 0, indicates that RMPP is not used by this client.
* @send_handler: The completion callback routine invoked after a send
* request has completed.
* @recv_handler: The completion callback routine invoked for a received
* MAD.
* @context: User specified context associated with the registration.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 port_num,
enum ib_qp_type qp_type,
struct ib_mad_reg_req *mad_reg_req,
u8 rmpp_version,
ib_mad_send_handler send_handler,
ib_mad_recv_handler recv_handler,
void *context);
enum ib_mad_snoop_flags {
/*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
/*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/
IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2),
/*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/
IB_MAD_SNOOP_RECVS = (1<<4)
/*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/
/*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/
};
/**
* ib_register_mad_snoop - Register to snoop sent and received MADs.
* @device: The device to register with.
* @port_num: The port on the specified device to use.
* @qp_type: Specifies which QP traffic to snoop. Must be either
* IB_QPT_SMI or IB_QPT_GSI.
* @mad_snoop_flags: Specifies information where snooping occurs.
* @send_handler: The callback routine invoked for a snooped send.
* @recv_handler: The callback routine invoked for a snooped receive.
* @context: User specified context associated with the registration.
*/
struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
u8 port_num,
enum ib_qp_type qp_type,
int mad_snoop_flags,
ib_mad_snoop_handler snoop_handler,
ib_mad_recv_handler recv_handler,
void *context);
/**
* ib_unregister_mad_agent - Unregisters a client from using MAD services.
* @mad_agent: Corresponding MAD registration request to deregister.
*
* After invoking this routine, MAD services are no longer usable by the
* client on the associated QP.
*/
int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
/**
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client.
* @send_buf: Specifies the information needed to send the MAD(s).
* @bad_send_buf: Specifies the MAD on which an error was encountered. This
* parameter is optional if only a single MAD is posted.
*
* Sent MADs are not guaranteed to complete in the order that they were posted.
*
* If the MAD requires RMPP, the data buffer should contain a single copy
* of the common MAD, RMPP, and class specific headers, followed by the class
* defined data. If the class defined data would not divide evenly into
* RMPP segments, then space must be allocated at the end of the referenced
* buffer for any required padding. To indicate the amount of class defined
* data being transferred, the paylen_newwin field in the RMPP header should
* be set to the size of the class specific header plus the amount of class
* defined data being transferred. The paylen_newwin field should be
* specified in network-byte order.
*/
int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
struct ib_mad_send_buf **bad_send_buf);
/**
* ib_free_recv_mad - Returns data buffers used to receive a MAD.
* @mad_recv_wc: Work completion information for a received MAD.
*
* Clients receiving MADs through their ib_mad_recv_handler must call this
* routine to return the work completion buffers to the access layer.
*/
void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
/**
* ib_cancel_mad - Cancels an outstanding send MAD operation.
* @mad_agent: Specifies the registration associated with sent MAD.
* @send_buf: Indicates the MAD to cancel.
*
* MADs will be returned to the user through the corresponding
* ib_mad_send_handler.
*/
void ib_cancel_mad(struct ib_mad_agent *mad_agent,
struct ib_mad_send_buf *send_buf);
/**
* ib_modify_mad - Modifies an outstanding send MAD operation.
* @mad_agent: Specifies the registration associated with sent MAD.
* @send_buf: Indicates the MAD to modify.
* @timeout_ms: New timeout value for sent MAD.
*
* This call will reset the timeout value for a sent MAD to the specified
* value.
*/
int ib_modify_mad(struct ib_mad_agent *mad_agent,
struct ib_mad_send_buf *send_buf, u32 timeout_ms);
/**
* ib_redirect_mad_qp - Registers a QP for MAD services.
* @qp: Reference to a QP that requires MAD services.
* @rmpp_version: If set, indicates that the client will send
* and receive MADs that contain the RMPP header for the given version.
* If set to 0, indicates that RMPP is not used by this client.
* @send_handler: The completion callback routine invoked after a send
* request has completed.
* @recv_handler: The completion callback routine invoked for a received
* MAD.
* @context: User specified context associated with the registration.
*
* Use of this call allows clients to use MAD services, such as RMPP,
* on user-owned QPs. After calling this routine, users may send
* MADs on the specified QP by calling ib_mad_post_send.
*/
struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
u8 rmpp_version,
ib_mad_send_handler send_handler,
ib_mad_recv_handler recv_handler,
void *context);
/**
* ib_process_mad_wc - Processes a work completion associated with a
* MAD sent or received on a redirected QP.
* @mad_agent: Specifies the registered MAD service using the redirected QP.
* @wc: References a work completion associated with a sent or received
* MAD segment.
*
* This routine is used to complete or continue processing on a MAD request.
* If the work completion is associated with a send operation, calling
* this routine is required to continue an RMPP transfer or to wait for a
* corresponding response, if it is a request. If the work completion is
* associated with a receive operation, calling this routine is required to
* process an inbound or outbound RMPP transfer, or to match a response MAD
* with its corresponding request.
*/
int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
struct ib_wc *wc);
/**
* ib_create_send_mad - Allocate and initialize a data buffer and work request
* for sending a MAD.
* @mad_agent: Specifies the registered MAD service to associate with the MAD.
* @remote_qpn: Specifies the QPN of the receiving node.
* @pkey_index: Specifies which PKey the MAD will be sent using. This field
* is valid only if the remote_qpn is QP 1.
* @rmpp_active: Indicates if the send will enable RMPP.
* @hdr_len: Indicates the size of the data header of the MAD. This length
* should include the common MAD header, RMPP header, plus any class
* specific header.
* @data_len: Indicates the size of any user-transferred data. The call will
* automatically adjust the allocated buffer size to account for any
* additional padding that may be necessary.
* @gfp_mask: GFP mask used for the memory allocation.
*
* This routine allocates a MAD for sending. The returned MAD send buffer
* will reference a data buffer usable for sending a MAD, along
* with an initialized work request structure. Users may modify the returned
* MAD data buffer before posting the send.
*
* The returned MAD header, class specific headers, and any padding will be
* cleared. Users are responsible for initializing the common MAD header,
* any class specific header, and MAD data area.
* If @rmpp_active is set, the RMPP header will be initialized for sending.
*/
struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
int rmpp_active,
int hdr_len, int data_len,
gfp_t gfp_mask);
/**
* ib_is_mad_class_rmpp - returns whether given management class
* supports RMPP.
* @mgmt_class: management class
*
* This routine returns whether the management class supports RMPP.
*/
int ib_is_mad_class_rmpp(u8 mgmt_class);
/**
* ib_get_mad_data_offset - returns the data offset for a given
* management class.
* @mgmt_class: management class
*
* This routine returns the data offset in the MAD for the management
* class requested.
*/
int ib_get_mad_data_offset(u8 mgmt_class);
/**
* ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
* @send_buf: Previously allocated send data buffer.
* @seg_num: number of segment to return
*
* This routine returns a pointer to the data buffer of an RMPP MAD.
* Users must provide synchronization to @send_buf around this call.
*/
void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
/**
* ib_free_send_mad - Returns data buffers used to send a MAD.
* @send_buf: Previously allocated send data buffer.
*/
void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
#endif /* IB_MAD_H */
diff --git a/sys/ofed/include/rdma/ib_verbs.h b/sys/ofed/include/rdma/ib_verbs.h
index 21fe2ffdb3bb..8363b095b232 100644
--- a/sys/ofed/include/rdma/ib_verbs.h
+++ b/sys/ofed/include/rdma/ib_verbs.h
@@ -1,2898 +1,3213 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(IB_VERBS_H)
#define IB_VERBS_H
#include <linux/types.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/if_ether.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
extern struct workqueue_struct *ib_wq;
union ib_gid {
u8 raw[16];
struct {
__be64 subnet_prefix;
__be64 interface_id;
} global;
};
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
RDMA_NODE_IB_SWITCH,
RDMA_NODE_IB_ROUTER,
RDMA_NODE_RNIC,
RDMA_NODE_MIC
};
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
RDMA_TRANSPORT_SCIF
};
enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
enum rdma_link_layer {
IB_LINK_LAYER_UNSPECIFIED,
IB_LINK_LAYER_INFINIBAND,
IB_LINK_LAYER_ETHERNET,
IB_LINK_LAYER_SCIF
};
enum ib_device_cap_flags {
IB_DEVICE_RESIZE_MAX_WR = 1,
IB_DEVICE_BAD_PKEY_CNTR = (1<<1),
IB_DEVICE_BAD_QKEY_CNTR = (1<<2),
IB_DEVICE_RAW_MULTI = (1<<3),
IB_DEVICE_AUTO_PATH_MIG = (1<<4),
IB_DEVICE_CHANGE_PHY_PORT = (1<<5),
IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6),
IB_DEVICE_CURR_QP_STATE_MOD = (1<<7),
IB_DEVICE_SHUTDOWN_PORT = (1<<8),
IB_DEVICE_INIT_TYPE = (1<<9),
IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10),
IB_DEVICE_SYS_IMAGE_GUID = (1<<11),
IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
IB_DEVICE_SRQ_RESIZE = (1<<13),
IB_DEVICE_N_NOTIFY_CQ = (1<<14),
IB_DEVICE_LOCAL_DMA_LKEY = (1<<15),
IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
IB_DEVICE_MEM_WINDOW = (1<<17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
* messages and can verify the validity of checksum for
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
IB_DEVICE_UD_IP_CSUM = (1<<18),
IB_DEVICE_UD_TSO = (1<<19),
IB_DEVICE_XRC = (1<<20),
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MR_ALLOCATE = (1<<23),
IB_DEVICE_SHARED_MR = (1<<24),
IB_DEVICE_QPG = (1<<25),
IB_DEVICE_UD_RSS = (1<<26),
IB_DEVICE_UD_TSS = (1<<27),
IB_DEVICE_CROSS_CHANNEL = (1<<28),
IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
/*
* Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or
* IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B
* memory windows. It can set neither to indicate it doesn't support
* type 2 windows at all.
*/
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<30),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<31),
IB_DEVICE_SIGNATURE_HANDOVER = (1LL<<32)
};
enum ib_signature_prot_cap {
IB_PROT_T10DIF_TYPE_1 = 1,
IB_PROT_T10DIF_TYPE_2 = 1 << 1,
IB_PROT_T10DIF_TYPE_3 = 1 << 2,
};
enum ib_signature_guard_cap {
IB_GUARD_T10DIF_CRC = 1,
IB_GUARD_T10DIF_CSUM = 1 << 1,
};
enum ib_atomic_cap {
IB_ATOMIC_NONE,
IB_ATOMIC_HCA,
IB_ATOMIC_GLOB
};
enum ib_cq_create_flags {
IB_CQ_CREATE_CROSS_CHANNEL = 1 << 0,
IB_CQ_TIMESTAMP = 1 << 1,
IB_CQ_TIMESTAMP_TO_SYS_TIME = 1 << 2
};
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
u64 max_mr_size;
u64 page_size_cap;
u32 vendor_id;
u32 vendor_part_id;
u32 hw_ver;
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
int max_sge;
int max_sge_rd;
int max_cq;
int max_cqe;
int max_mr;
int max_pd;
int max_qp_rd_atom;
int max_ee_rd_atom;
int max_res_rd_atom;
int max_qp_init_rd_atom;
int max_ee_init_rd_atom;
enum ib_atomic_cap atomic_cap;
enum ib_atomic_cap masked_atomic_cap;
int max_ee;
int max_rdd;
int max_mw;
int max_raw_ipv6_qp;
int max_raw_ethy_qp;
int max_mcast_grp;
int max_mcast_qp_attach;
int max_total_mcast_qp_attach;
int max_ah;
int max_fmr;
int max_map_per_fmr;
int max_srq;
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
int max_rss_tbl_sz;
u16 max_pkeys;
u8 local_ca_ack_delay;
int comp_mask;
uint64_t timestamp_mask;
uint64_t hca_core_clock;
unsigned int sig_prot_cap;
unsigned int sig_guard_cap;
};
enum ib_device_attr_comp_mask {
IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1,
IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2
};
enum ib_mtu {
IB_MTU_256 = 1,
IB_MTU_512 = 2,
IB_MTU_1024 = 3,
IB_MTU_2048 = 4,
IB_MTU_4096 = 5
};
static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
{
switch (mtu) {
case IB_MTU_256: return 256;
case IB_MTU_512: return 512;
case IB_MTU_1024: return 1024;
case IB_MTU_2048: return 2048;
case IB_MTU_4096: return 4096;
default: return -1;
}
}
enum ib_port_state {
IB_PORT_NOP = 0,
IB_PORT_DOWN = 1,
IB_PORT_INIT = 2,
IB_PORT_ARMED = 3,
IB_PORT_ACTIVE = 4,
IB_PORT_ACTIVE_DEFER = 5,
IB_PORT_DUMMY = -1 /* force enum signed */
};
enum ib_port_cap_flags {
IB_PORT_SM = 1 << 1,
IB_PORT_NOTICE_SUP = 1 << 2,
IB_PORT_TRAP_SUP = 1 << 3,
IB_PORT_OPT_IPD_SUP = 1 << 4,
IB_PORT_AUTO_MIGR_SUP = 1 << 5,
IB_PORT_SL_MAP_SUP = 1 << 6,
IB_PORT_MKEY_NVRAM = 1 << 7,
IB_PORT_PKEY_NVRAM = 1 << 8,
IB_PORT_LED_INFO_SUP = 1 << 9,
IB_PORT_SM_DISABLED = 1 << 10,
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
IB_PORT_DR_NOTICE_SUP = 1 << 21,
IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
IB_PORT_BOOT_MGMT_SUP = 1 << 23,
IB_PORT_LINK_LATENCY_SUP = 1 << 24,
IB_PORT_CLIENT_REG_SUP = 1 << 25
};
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_4X = 2,
IB_WIDTH_8X = 4,
IB_WIDTH_12X = 8
};
static inline int ib_width_enum_to_int(enum ib_port_width width)
{
switch (width) {
case IB_WIDTH_1X: return 1;
case IB_WIDTH_4X: return 4;
case IB_WIDTH_8X: return 8;
case IB_WIDTH_12X: return 12;
default: return -1;
}
}
enum ib_port_speed {
IB_SPEED_SDR = 1,
IB_SPEED_DDR = 2,
IB_SPEED_QDR = 4,
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32
};
struct ib_protocol_stats {
/* TBD... */
};
struct iw_protocol_stats {
u64 ipInReceives;
u64 ipInHdrErrors;
u64 ipInTooBigErrors;
u64 ipInNoRoutes;
u64 ipInAddrErrors;
u64 ipInUnknownProtos;
u64 ipInTruncatedPkts;
u64 ipInDiscards;
u64 ipInDelivers;
u64 ipOutForwDatagrams;
u64 ipOutRequests;
u64 ipOutDiscards;
u64 ipOutNoRoutes;
u64 ipReasmTimeout;
u64 ipReasmReqds;
u64 ipReasmOKs;
u64 ipReasmFails;
u64 ipFragOKs;
u64 ipFragFails;
u64 ipFragCreates;
u64 ipInMcastPkts;
u64 ipOutMcastPkts;
u64 ipInBcastPkts;
u64 ipOutBcastPkts;
u64 tcpRtoAlgorithm;
u64 tcpRtoMin;
u64 tcpRtoMax;
u64 tcpMaxConn;
u64 tcpActiveOpens;
u64 tcpPassiveOpens;
u64 tcpAttemptFails;
u64 tcpEstabResets;
u64 tcpCurrEstab;
u64 tcpInSegs;
u64 tcpOutSegs;
u64 tcpRetransSegs;
u64 tcpInErrs;
u64 tcpOutRsts;
};
union rdma_protocol_stats {
struct ib_protocol_stats ib;
struct iw_protocol_stats iw;
};
+/* Define bits for the various functionality this port needs to be supported by
+ * the core.
+ */
+/* Management 0x00000FFF */
+#define RDMA_CORE_CAP_IB_MAD 0x00000001
+#define RDMA_CORE_CAP_IB_SMI 0x00000002
+#define RDMA_CORE_CAP_IB_CM 0x00000004
+#define RDMA_CORE_CAP_IW_CM 0x00000008
+#define RDMA_CORE_CAP_IB_SA 0x00000010
+#define RDMA_CORE_CAP_OPA_MAD 0x00000020
+
+/* Address format 0x000FF000 */
+#define RDMA_CORE_CAP_AF_IB 0x00001000
+#define RDMA_CORE_CAP_ETH_AH 0x00002000
+#define RDMA_CORE_CAP_OPA_AH 0x00004000
+
+/* Protocol 0xFFF00000 */
+#define RDMA_CORE_CAP_PROT_IB 0x00100000
+#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
+#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
+#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
+#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
+
+#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_SMI \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_IB_SA \
+ | RDMA_CORE_CAP_AF_IB)
+#define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \
+ (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
+ | RDMA_CORE_CAP_IW_CM)
+#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
+ | RDMA_CORE_CAP_OPA_MAD)
+
+#define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET)
+
+#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
+
struct ib_port_attr {
enum ib_port_state state;
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
int gid_tbl_len;
u32 port_cap_flags;
u32 max_msg_sz;
u32 bad_pkey_cntr;
u32 qkey_viol_cntr;
u16 pkey_tbl_len;
u16 lid;
u16 sm_lid;
u8 lmc;
u8 max_vl_num;
u8 sm_sl;
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
u8 active_speed;
u8 phys_state;
};
enum ib_device_modify_flags {
IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0,
IB_DEVICE_MODIFY_NODE_DESC = 1 << 1
};
struct ib_device_modify {
u64 sys_image_guid;
char node_desc[64];
};
enum ib_port_modify_flags {
IB_PORT_SHUTDOWN = 1,
IB_PORT_INIT_TYPE = (1<<2),
IB_PORT_RESET_QKEY_CNTR = (1<<3)
};
struct ib_port_modify {
u32 set_port_cap_mask;
u32 clr_port_cap_mask;
u8 init_type;
};
enum ib_event_type {
IB_EVENT_CQ_ERR,
IB_EVENT_QP_FATAL,
IB_EVENT_QP_REQ_ERR,
IB_EVENT_QP_ACCESS_ERR,
IB_EVENT_COMM_EST,
IB_EVENT_SQ_DRAINED,
IB_EVENT_PATH_MIG,
IB_EVENT_PATH_MIG_ERR,
IB_EVENT_DEVICE_FATAL,
IB_EVENT_PORT_ACTIVE,
IB_EVENT_PORT_ERR,
IB_EVENT_LID_CHANGE,
IB_EVENT_PKEY_CHANGE,
IB_EVENT_SM_CHANGE,
IB_EVENT_SRQ_ERR,
IB_EVENT_SRQ_LIMIT_REACHED,
IB_EVENT_QP_LAST_WQE_REACHED,
IB_EVENT_CLIENT_REREGISTER,
IB_EVENT_GID_CHANGE,
};
struct ib_event {
struct ib_device *device;
union {
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_srq *srq;
u8 port_num;
} element;
enum ib_event_type event;
};
struct ib_event_handler {
struct ib_device *device;
void (*handler)(struct ib_event_handler *, struct ib_event *);
struct list_head list;
};
#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \
do { \
(_ptr)->device = _device; \
(_ptr)->handler = _handler; \
INIT_LIST_HEAD(&(_ptr)->list); \
} while (0)
struct ib_global_route {
union ib_gid dgid;
u32 flow_label;
u8 sgid_index;
u8 hop_limit;
u8 traffic_class;
};
struct ib_grh {
__be32 version_tclass_flow;
__be16 paylen;
u8 next_hdr;
u8 hop_limit;
union ib_gid sgid;
union ib_gid dgid;
};
enum {
IB_MULTICAST_QPN = 0xffffff
};
#define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF)
enum ib_ah_flags {
IB_AH_GRH = 1
};
enum ib_rate {
IB_RATE_PORT_CURRENT = 0,
IB_RATE_2_5_GBPS = 2,
IB_RATE_5_GBPS = 5,
IB_RATE_10_GBPS = 3,
IB_RATE_20_GBPS = 6,
IB_RATE_30_GBPS = 4,
IB_RATE_40_GBPS = 7,
IB_RATE_60_GBPS = 8,
IB_RATE_80_GBPS = 9,
IB_RATE_120_GBPS = 10,
IB_RATE_14_GBPS = 11,
IB_RATE_56_GBPS = 12,
IB_RATE_112_GBPS = 13,
IB_RATE_168_GBPS = 14,
IB_RATE_25_GBPS = 15,
IB_RATE_100_GBPS = 16,
IB_RATE_200_GBPS = 17,
IB_RATE_300_GBPS = 18
};
enum ib_mr_create_flags {
IB_MR_SIGNATURE_EN = 1,
};
/**
* ib_mr_init_attr - Memory region init attributes passed to routine
* ib_create_mr.
* @max_reg_descriptors: max number of registration descriptors that
* may be used with registration work requests.
* @flags: MR creation flags bit mask.
*/
struct ib_mr_init_attr {
int max_reg_descriptors;
u32 flags;
};
/**
* ib_rate_to_mult - Convert the IB rate enum to a multiple of the
* base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be
* converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
* @rate: rate to convert.
*/
int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
/**
* ib_rate_to_mbps - Convert the IB rate enum to Mbps.
* For example, IB_RATE_2_5_GBPS will be converted to 2500.
* @rate: rate to convert.
*/
int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
struct ib_cq_init_attr {
int cqe;
int comp_vector;
u32 flags;
};
enum ib_signature_type {
IB_SIG_TYPE_T10_DIF,
};
/**
* T10-DIF Signature types
* T10-DIF types are defined by SCSI
* specifications.
*/
enum ib_t10_dif_type {
IB_T10DIF_NONE,
IB_T10DIF_TYPE1,
IB_T10DIF_TYPE2,
IB_T10DIF_TYPE3
};
/**
* Signature T10-DIF block-guard types
* IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
* IB_T10DIF_CSUM: Corresponds to IP checksum rules.
*/
enum ib_t10_dif_bg_type {
IB_T10DIF_CRC,
IB_T10DIF_CSUM
};
/**
* struct ib_t10_dif_domain - Parameters specific for T10-DIF
* domain.
* @type: T10-DIF type (0|1|2|3)
* @bg_type: T10-DIF block guard type (CRC|CSUM)
* @pi_interval: protection information interval.
* @bg: seed of guard computation.
* @app_tag: application tag of guard block
* @ref_tag: initial guard block reference tag.
* @type3_inc_reftag: T10-DIF type 3 does not state
* about the reference tag, it is the user
* choice to increment it or not.
*/
struct ib_t10_dif_domain {
enum ib_t10_dif_type type;
enum ib_t10_dif_bg_type bg_type;
u32 pi_interval;
u16 bg;
u16 app_tag;
u32 ref_tag;
bool type3_inc_reftag;
};
/**
* struct ib_sig_domain - Parameters for signature domain
* @sig_type: specific signauture type
* @sig: union of all signature domain attributes that may
* be used to set domain layout.
*/
struct ib_sig_domain {
enum ib_signature_type sig_type;
union {
struct ib_t10_dif_domain dif;
} sig;
};
/**
* struct ib_sig_attrs - Parameters for signature handover operation
* @check_mask: bitmask for signature byte check (8 bytes)
* @mem: memory domain layout desciptor.
* @wire: wire domain layout desciptor.
*/
struct ib_sig_attrs {
u8 check_mask;
struct ib_sig_domain mem;
struct ib_sig_domain wire;
};
enum ib_sig_err_type {
IB_SIG_BAD_GUARD,
IB_SIG_BAD_REFTAG,
IB_SIG_BAD_APPTAG,
};
/**
* struct ib_sig_err - signature error descriptor
*/
struct ib_sig_err {
enum ib_sig_err_type err_type;
u32 expected;
u32 actual;
u64 sig_err_offset;
u32 key;
};
enum ib_mr_status_check {
IB_MR_CHECK_SIG_STATUS = 1,
};
/**
* struct ib_mr_status - Memory region status container
*
* @fail_status: Bitmask of MR checks status. For each
* failed check a corresponding status bit is set.
* @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
* failure.
*/
struct ib_mr_status {
u32 fail_status;
struct ib_sig_err sig_err;
};
/**
* mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
* enum.
* @mult: multiple to convert.
*/
enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
struct ib_ah_attr {
struct ib_global_route grh;
u16 dlid;
u8 sl;
u8 src_path_bits;
u8 static_rate;
u8 ah_flags;
u8 port_num;
u8 dmac[6];
u16 vlan_id;
};
enum ib_wc_status {
IB_WC_SUCCESS,
IB_WC_LOC_LEN_ERR,
IB_WC_LOC_QP_OP_ERR,
IB_WC_LOC_EEC_OP_ERR,
IB_WC_LOC_PROT_ERR,
IB_WC_WR_FLUSH_ERR,
IB_WC_MW_BIND_ERR,
IB_WC_BAD_RESP_ERR,
IB_WC_LOC_ACCESS_ERR,
IB_WC_REM_INV_REQ_ERR,
IB_WC_REM_ACCESS_ERR,
IB_WC_REM_OP_ERR,
IB_WC_RETRY_EXC_ERR,
IB_WC_RNR_RETRY_EXC_ERR,
IB_WC_LOC_RDD_VIOL_ERR,
IB_WC_REM_INV_RD_REQ_ERR,
IB_WC_REM_ABORT_ERR,
IB_WC_INV_EECN_ERR,
IB_WC_INV_EEC_STATE_ERR,
IB_WC_FATAL_ERR,
IB_WC_RESP_TIMEOUT_ERR,
IB_WC_GENERAL_ERR
};
enum ib_wc_opcode {
IB_WC_SEND,
IB_WC_RDMA_WRITE,
IB_WC_RDMA_READ,
IB_WC_COMP_SWAP,
IB_WC_FETCH_ADD,
IB_WC_BIND_MW,
IB_WC_LSO,
IB_WC_LOCAL_INV,
IB_WC_FAST_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
*/
IB_WC_RECV = 1 << 7,
IB_WC_RECV_RDMA_WITH_IMM
};
enum ib_wc_flags {
IB_WC_GRH = 1,
IB_WC_WITH_IMM = (1<<1),
IB_WC_WITH_INVALIDATE = (1<<2),
IB_WC_IP_CSUM_OK = (1<<3),
IB_WC_WITH_SL = (1<<4),
IB_WC_WITH_SLID = (1<<5),
IB_WC_WITH_TIMESTAMP = (1<<6),
IB_WC_WITH_SMAC = (1<<7),
IB_WC_WITH_VLAN = (1<<8),
};
struct ib_wc {
u64 wr_id;
enum ib_wc_status status;
enum ib_wc_opcode opcode;
u32 vendor_err;
u32 byte_len;
struct ib_qp *qp;
union {
__be32 imm_data;
u32 invalidate_rkey;
} ex;
u32 src_qp;
int wc_flags;
u16 pkey_index;
u16 slid;
u8 sl;
u8 dlid_path_bits;
u8 port_num; /* valid only for DR SMPs on switches */
int csum_ok;
struct {
uint64_t timestamp; /* timestamp = 0 indicates error*/
} ts;
u8 smac[6];
u16 vlan_id;
};
enum ib_cq_notify_flags {
IB_CQ_SOLICITED = 1 << 0,
IB_CQ_NEXT_COMP = 1 << 1,
IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
IB_CQ_REPORT_MISSED_EVENTS = 1 << 2,
};
enum ib_srq_type {
IB_SRQT_BASIC,
IB_SRQT_XRC
};
enum ib_srq_attr_mask {
IB_SRQ_MAX_WR = 1 << 0,
IB_SRQ_LIMIT = 1 << 1,
};
struct ib_srq_attr {
u32 max_wr;
u32 max_sge;
u32 srq_limit;
};
struct ib_srq_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
struct ib_srq_attr attr;
enum ib_srq_type srq_type;
union {
struct {
struct ib_xrcd *xrcd;
struct ib_cq *cq;
} xrc;
} ext;
};
struct ib_qp_cap {
u32 max_send_wr;
u32 max_recv_wr;
u32 max_send_sge;
u32 max_recv_sge;
u32 max_inline_data;
u32 qpg_tss_mask_sz;
};
enum ib_sig_type {
IB_SIGNAL_ALL_WR,
IB_SIGNAL_REQ_WR
};
enum ib_qp_type {
/*
* IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
* here (and in that order) since the MAD layer uses them as
* indices into a 2-entry table.
*/
IB_QPT_SMI,
IB_QPT_GSI,
IB_QPT_RC,
IB_QPT_UC,
IB_QPT_UD,
IB_QPT_RAW_IPV6,
IB_QPT_RAW_ETHERTYPE,
IB_QPT_RAW_PACKET = 8,
IB_QPT_XRC_INI = 9,
IB_QPT_XRC_TGT,
IB_QPT_DC_INI,
IB_QPT_MAX,
/* Reserve a range for qp types internal to the low level driver.
* These qp types will not be visible at the IB core layer, so the
* IB_QPT_MAX usages should not be affected in the core layer
*/
IB_QPT_RESERVED1 = 0x1000,
IB_QPT_RESERVED2,
IB_QPT_RESERVED3,
IB_QPT_RESERVED4,
IB_QPT_RESERVED5,
IB_QPT_RESERVED6,
IB_QPT_RESERVED7,
IB_QPT_RESERVED8,
IB_QPT_RESERVED9,
IB_QPT_RESERVED10,
};
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
IB_QP_CREATE_MANAGED_SEND = 1 << 3,
IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
};
enum ib_qpg_type {
IB_QPG_NONE = 0,
IB_QPG_PARENT = (1<<0),
IB_QPG_CHILD_RX = (1<<1),
IB_QPG_CHILD_TX = (1<<2)
};
struct ib_qpg_init_attrib {
u32 tss_child_count;
u32 rss_child_count;
};
struct ib_qp_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct ib_qp_cap cap;
union {
struct ib_qp *qpg_parent; /* see qpg_type */
struct ib_qpg_init_attrib parent_attrib;
};
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
enum ib_qpg_type qpg_type;
u8 port_num; /* special QP types only */
};
enum {
IB_DCT_CREATE_FLAG_RCV_INLINE = 1 << 0,
IB_DCT_CREATE_FLAGS_MASK = IB_DCT_CREATE_FLAG_RCV_INLINE,
};
struct ib_dct_init_attr {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_srq *srq;
u64 dc_key;
u8 port;
u32 access_flags;
u8 min_rnr_timer;
u8 tclass;
u32 flow_label;
enum ib_mtu mtu;
u8 pkey_index;
u8 gid_index;
u8 hop_limit;
u32 create_flags;
};
struct ib_dct_attr {
u64 dc_key;
u8 port;
u32 access_flags;
u8 min_rnr_timer;
u8 tclass;
u32 flow_label;
enum ib_mtu mtu;
u8 pkey_index;
u8 gid_index;
u8 hop_limit;
u32 key_violations;
u8 state;
};
struct ib_qp_open_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
enum ib_qp_type qp_type;
};
enum ib_rnr_timeout {
IB_RNR_TIMER_655_36 = 0,
IB_RNR_TIMER_000_01 = 1,
IB_RNR_TIMER_000_02 = 2,
IB_RNR_TIMER_000_03 = 3,
IB_RNR_TIMER_000_04 = 4,
IB_RNR_TIMER_000_06 = 5,
IB_RNR_TIMER_000_08 = 6,
IB_RNR_TIMER_000_12 = 7,
IB_RNR_TIMER_000_16 = 8,
IB_RNR_TIMER_000_24 = 9,
IB_RNR_TIMER_000_32 = 10,
IB_RNR_TIMER_000_48 = 11,
IB_RNR_TIMER_000_64 = 12,
IB_RNR_TIMER_000_96 = 13,
IB_RNR_TIMER_001_28 = 14,
IB_RNR_TIMER_001_92 = 15,
IB_RNR_TIMER_002_56 = 16,
IB_RNR_TIMER_003_84 = 17,
IB_RNR_TIMER_005_12 = 18,
IB_RNR_TIMER_007_68 = 19,
IB_RNR_TIMER_010_24 = 20,
IB_RNR_TIMER_015_36 = 21,
IB_RNR_TIMER_020_48 = 22,
IB_RNR_TIMER_030_72 = 23,
IB_RNR_TIMER_040_96 = 24,
IB_RNR_TIMER_061_44 = 25,
IB_RNR_TIMER_081_92 = 26,
IB_RNR_TIMER_122_88 = 27,
IB_RNR_TIMER_163_84 = 28,
IB_RNR_TIMER_245_76 = 29,
IB_RNR_TIMER_327_68 = 30,
IB_RNR_TIMER_491_52 = 31
};
enum ib_qp_attr_mask {
IB_QP_STATE = 1,
IB_QP_CUR_STATE = (1<<1),
IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2),
IB_QP_ACCESS_FLAGS = (1<<3),
IB_QP_PKEY_INDEX = (1<<4),
IB_QP_PORT = (1<<5),
IB_QP_QKEY = (1<<6),
IB_QP_AV = (1<<7),
IB_QP_PATH_MTU = (1<<8),
IB_QP_TIMEOUT = (1<<9),
IB_QP_RETRY_CNT = (1<<10),
IB_QP_RNR_RETRY = (1<<11),
IB_QP_RQ_PSN = (1<<12),
IB_QP_MAX_QP_RD_ATOMIC = (1<<13),
IB_QP_ALT_PATH = (1<<14),
IB_QP_MIN_RNR_TIMER = (1<<15),
IB_QP_SQ_PSN = (1<<16),
IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
IB_QP_DEST_QPN = (1<<20),
IB_QP_GROUP_RSS = (1<<21),
IB_QP_DC_KEY = (1<<22),
IB_QP_SMAC = (1<<23),
IB_QP_ALT_SMAC = (1<<24),
IB_QP_VID = (1<<25),
IB_QP_ALT_VID = (1<<26)
};
enum ib_qp_state {
IB_QPS_RESET,
IB_QPS_INIT,
IB_QPS_RTR,
IB_QPS_RTS,
IB_QPS_SQD,
IB_QPS_SQE,
IB_QPS_ERR,
IB_QPS_DUMMY = -1 /* force enum signed */
};
enum ib_mig_state {
IB_MIG_MIGRATED,
IB_MIG_REARM,
IB_MIG_ARMED
};
enum ib_mw_type {
IB_MW_TYPE_1 = 1,
IB_MW_TYPE_2 = 2
};
struct ib_qp_attr {
enum ib_qp_state qp_state;
enum ib_qp_state cur_qp_state;
enum ib_mtu path_mtu;
enum ib_mig_state path_mig_state;
u32 qkey;
u32 rq_psn;
u32 sq_psn;
u32 dest_qp_num;
int qp_access_flags;
struct ib_qp_cap cap;
struct ib_ah_attr ah_attr;
struct ib_ah_attr alt_ah_attr;
u16 pkey_index;
u16 alt_pkey_index;
u8 en_sqd_async_notify;
u8 sq_draining;
u8 max_rd_atomic;
u8 max_dest_rd_atomic;
u8 min_rnr_timer;
u8 port_num;
u8 timeout;
u8 retry_cnt;
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
u8 smac[ETH_ALEN];
u8 alt_smac[ETH_ALEN];
u16 vlan_id;
u16 alt_vlan_id;
};
struct ib_qp_attr_ex {
enum ib_qp_state qp_state;
enum ib_qp_state cur_qp_state;
enum ib_mtu path_mtu;
enum ib_mig_state path_mig_state;
u32 qkey;
u32 rq_psn;
u32 sq_psn;
u32 dest_qp_num;
int qp_access_flags;
struct ib_qp_cap cap;
struct ib_ah_attr ah_attr;
struct ib_ah_attr alt_ah_attr;
u16 pkey_index;
u16 alt_pkey_index;
u8 en_sqd_async_notify;
u8 sq_draining;
u8 max_rd_atomic;
u8 max_dest_rd_atomic;
u8 min_rnr_timer;
u8 port_num;
u8 timeout;
u8 retry_cnt;
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
u64 dct_key;
};
enum ib_wr_opcode {
IB_WR_RDMA_WRITE,
IB_WR_RDMA_WRITE_WITH_IMM,
IB_WR_SEND,
IB_WR_SEND_WITH_IMM,
IB_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO,
IB_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV,
IB_WR_LOCAL_INV,
IB_WR_FAST_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
IB_WR_REG_SIG_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
IB_WR_RESERVED1 = 0xf0,
IB_WR_RESERVED2,
IB_WR_RESERVED3,
IB_WR_RESERVED4,
IB_WR_RESERVED5,
IB_WR_RESERVED6,
IB_WR_RESERVED7,
IB_WR_RESERVED8,
IB_WR_RESERVED9,
IB_WR_RESERVED10,
};
enum ib_send_flags {
IB_SEND_FENCE = 1,
IB_SEND_SIGNALED = (1<<1),
IB_SEND_SOLICITED = (1<<2),
IB_SEND_INLINE = (1<<3),
IB_SEND_IP_CSUM = (1<<4),
/* reserve bits 26-31 for low level drivers' internal use */
IB_SEND_RESERVED_START = (1 << 26),
IB_SEND_RESERVED_END = (1 << 31),
IB_SEND_UMR_UNREG = (1<<5)
};
struct ib_sge {
u64 addr;
u32 length;
u32 lkey;
};
struct ib_fast_reg_page_list {
struct ib_device *device;
u64 *page_list;
unsigned int max_page_list_len;
};
/**
* struct ib_mw_bind_info - Parameters for a memory window bind operation.
* @mr: A memory region to bind the memory window to.
* @addr: The address where the memory window should begin.
* @length: The length of the memory window, in bytes.
* @mw_access_flags: Access flags from enum ib_access_flags for the window.
*
* This struct contains the shared parameters for type 1 and type 2
* memory window bind operations.
*/
struct ib_mw_bind_info {
struct ib_mr *mr;
u64 addr;
u64 length;
int mw_access_flags;
};
struct ib_send_wr {
struct ib_send_wr *next;
u64 wr_id;
struct ib_sge *sg_list;
int num_sge;
enum ib_wr_opcode opcode;
int send_flags;
union {
__be32 imm_data;
u32 invalidate_rkey;
} ex;
union {
struct {
u64 remote_addr;
u32 rkey;
} rdma;
struct {
u64 remote_addr;
u64 compare_add;
u64 swap;
u64 compare_add_mask;
u64 swap_mask;
u32 rkey;
} atomic;
struct {
struct ib_ah *ah;
void *header;
int hlen;
int mss;
u32 remote_qpn;
u32 remote_qkey;
u16 pkey_index; /* valid for GSI only */
u8 port_num; /* valid for DR SMPs on switch only */
} ud;
struct {
u64 iova_start;
struct ib_fast_reg_page_list *page_list;
unsigned int page_shift;
unsigned int page_list_len;
u32 length;
int access_flags;
u32 rkey;
} fast_reg;
struct {
int npages;
int access_flags;
u32 mkey;
struct ib_pd *pd;
u64 virt_addr;
u64 length;
int page_shift;
} umr;
struct {
struct ib_mw *mw;
/* The new rkey for the memory window. */
u32 rkey;
struct ib_mw_bind_info bind_info;
} bind_mw;
struct {
struct ib_sig_attrs *sig_attrs;
struct ib_mr *sig_mr;
int access_flags;
struct ib_sge *prot;
} sig_handover;
} wr;
u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
struct ib_recv_wr {
struct ib_recv_wr *next;
u64 wr_id;
struct ib_sge *sg_list;
int num_sge;
};
enum ib_access_flags {
IB_ACCESS_LOCAL_WRITE = 1,
IB_ACCESS_REMOTE_WRITE = (1<<1),
IB_ACCESS_REMOTE_READ = (1<<2),
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
IB_ACCESS_MW_BIND = (1<<4),
IB_ACCESS_ALLOCATE_MR = (1<<5),
IB_ZERO_BASED = (1<<13)
};
struct ib_phys_buf {
u64 addr;
u64 size;
};
struct ib_mr_attr {
struct ib_pd *pd;
u64 device_virt_addr;
u64 size;
int mr_access_flags;
u32 lkey;
u32 rkey;
};
enum ib_mr_rereg_flags {
IB_MR_REREG_TRANS = 1,
IB_MR_REREG_PD = (1<<1),
IB_MR_REREG_ACCESS = (1<<2)
};
/**
* struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
* @wr_id: Work request id.
* @send_flags: Flags from ib_send_flags enum.
* @bind_info: More parameters of the bind operation.
*/
struct ib_mw_bind {
u64 wr_id;
int send_flags;
struct ib_mw_bind_info bind_info;
};
struct ib_fmr_attr {
int max_pages;
int max_maps;
u8 page_shift;
};
struct ib_ucontext {
struct ib_device *device;
struct list_head pd_list;
struct list_head mr_list;
struct list_head mw_list;
struct list_head cq_list;
struct list_head qp_list;
struct list_head srq_list;
struct list_head ah_list;
struct list_head xrcd_list;
struct list_head rule_list;
struct list_head dct_list;
int closing;
void *peer_mem_private_data;
char *peer_mem_name;
};
struct ib_uobject {
u64 user_handle; /* handle given to us by userspace */
struct ib_ucontext *context; /* associated user context */
void *object; /* containing object */
struct list_head list; /* link to context's list */
int id; /* index into kernel idr */
struct kref ref;
struct rw_semaphore mutex; /* protects .live */
int live;
};
struct ib_udata;
struct ib_udata_ops {
int (*copy_from)(void *dest, struct ib_udata *udata,
size_t len);
int (*copy_to)(struct ib_udata *udata, void *src,
size_t len);
};
struct ib_udata {
struct ib_udata_ops *ops;
void __user *inbuf;
void __user *outbuf;
size_t inlen;
size_t outlen;
};
struct ib_pd {
struct ib_device *device;
struct ib_uobject *uobject;
atomic_t usecnt; /* count all resources */
};
struct ib_xrcd {
struct ib_device *device;
atomic_t usecnt; /* count all exposed resources */
struct inode *inode;
struct mutex tgt_qp_mutex;
struct list_head tgt_qp_list;
};
struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
};
enum ib_cq_attr_mask {
IB_CQ_MODERATION = (1 << 0),
IB_CQ_CAP_FLAGS = (1 << 1)
};
enum ib_cq_cap_flags {
IB_CQ_IGNORE_OVERRUN = (1 << 0)
};
struct ib_cq_attr {
struct {
u16 cq_count;
u16 cq_period;
} moderation;
u32 cq_cap_flags;
};
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
struct ib_cq {
struct ib_device *device;
struct ib_uobject *uobject;
ib_comp_handler comp_handler;
void (*event_handler)(struct ib_event *, void *);
void *cq_context;
int cqe;
atomic_t usecnt; /* count number of work queues */
};
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
enum ib_srq_type srq_type;
atomic_t usecnt;
union {
struct {
struct ib_xrcd *xrcd;
struct ib_cq *cq;
u32 srq_num;
} xrc;
} ext;
};
struct ib_qp {
struct ib_device *device;
struct ib_pd *pd;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct list_head xrcd_list;
/* count times opened, mcast attaches, flow attaches */
atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
enum ib_qp_type qp_type;
enum ib_qpg_type qpg_type;
u8 port_num;
};
struct ib_dct {
struct ib_device *device;
struct ib_uobject *uobject;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_srq *srq;
u32 dct_num;
};
struct ib_mr {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
u32 lkey;
u32 rkey;
atomic_t usecnt; /* count number of MWs */
};
struct ib_mw {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
u32 rkey;
enum ib_mw_type type;
};
struct ib_fmr {
struct ib_device *device;
struct ib_pd *pd;
struct list_head list;
u32 lkey;
u32 rkey;
};
/* Supported steering options */
enum ib_flow_attr_type {
/* steering according to rule specifications */
IB_FLOW_ATTR_NORMAL = 0x0,
/* default unicast and multicast rule -
* receive all Eth traffic which isn't steered to any QP
*/
IB_FLOW_ATTR_ALL_DEFAULT = 0x1,
/* default multicast rule -
* receive all Eth multicast traffic which isn't steered to any QP
*/
IB_FLOW_ATTR_MC_DEFAULT = 0x2,
/* sniffer rule - receive all port traffic */
IB_FLOW_ATTR_SNIFFER = 0x3
};
/* Supported steering header types */
enum ib_flow_spec_type {
/* L2 headers*/
IB_FLOW_SPEC_ETH = 0x20,
IB_FLOW_SPEC_IB = 0x21,
/* L3 header*/
IB_FLOW_SPEC_IPV4 = 0x30,
/* L4 headers*/
IB_FLOW_SPEC_TCP = 0x40,
IB_FLOW_SPEC_UDP = 0x41
};
#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
/* Flow steering rule priority is set according to it's domain.
* Lower domain value means higher priority.
*/
enum ib_flow_domain {
IB_FLOW_DOMAIN_USER,
IB_FLOW_DOMAIN_ETHTOOL,
IB_FLOW_DOMAIN_RFS,
IB_FLOW_DOMAIN_NIC,
IB_FLOW_DOMAIN_NUM /* Must be last */
};
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1
};
struct ib_flow_eth_filter {
u8 dst_mac[6];
u8 src_mac[6];
__be16 ether_type;
__be16 vlan_tag;
};
struct ib_flow_spec_eth {
enum ib_flow_spec_type type;
u16 size;
struct ib_flow_eth_filter val;
struct ib_flow_eth_filter mask;
};
struct ib_flow_ib_filter {
__be32 l3_type_qpn;
u8 dst_gid[16];
};
struct ib_flow_spec_ib {
enum ib_flow_spec_type type;
u16 size;
struct ib_flow_ib_filter val;
struct ib_flow_ib_filter mask;
};
struct ib_flow_ipv4_filter {
__be32 src_ip;
__be32 dst_ip;
};
struct ib_flow_spec_ipv4 {
enum ib_flow_spec_type type;
u16 size;
struct ib_flow_ipv4_filter val;
struct ib_flow_ipv4_filter mask;
};
struct ib_flow_tcp_udp_filter {
__be16 dst_port;
__be16 src_port;
};
struct ib_flow_spec_tcp_udp {
enum ib_flow_spec_type type;
u16 size;
struct ib_flow_tcp_udp_filter val;
struct ib_flow_tcp_udp_filter mask;
};
union ib_flow_spec {
struct {
enum ib_flow_spec_type type;
u16 size;
};
struct ib_flow_spec_ib ib;
struct ib_flow_spec_eth eth;
struct ib_flow_spec_ipv4 ipv4;
struct ib_flow_spec_tcp_udp tcp_udp;
};
struct ib_flow_attr {
enum ib_flow_attr_type type;
u16 size;
u16 priority;
u8 num_of_specs;
u8 port;
u32 flags;
/* Following are the optional layers according to user request
* struct ib_flow_spec_xxx
* struct ib_flow_spec_yyy
*/
};
struct ib_flow {
struct ib_qp *qp;
struct ib_uobject *uobject;
};
struct ib_mad;
struct ib_grh;
enum ib_process_mad_flags {
IB_MAD_IGNORE_MKEY = 1,
IB_MAD_IGNORE_BKEY = 2,
IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
};
enum ib_mad_result {
IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */
IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */
IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */
IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */
};
#define IB_DEVICE_NAME_MAX 64
struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
struct ib_pkey_cache **pkey_cache;
struct ib_gid_cache **gid_cache;
u8 *lmc_cache;
};
enum verbs_values_mask {
IBV_VALUES_HW_CLOCK = 1 << 0
};
struct ib_device_values {
int values_mask;
uint64_t hwclock;
};
struct ib_dma_mapping_ops {
int (*mapping_error)(struct ib_device *dev,
u64 dma_addr);
u64 (*map_single)(struct ib_device *dev,
void *ptr, size_t size,
enum dma_data_direction direction);
void (*unmap_single)(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction);
u64 (*map_page)(struct ib_device *dev,
struct page *page, unsigned long offset,
size_t size,
enum dma_data_direction direction);
void (*unmap_page)(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction);
int (*map_sg)(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction);
void (*unmap_sg)(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction);
u64 (*dma_address)(struct ib_device *dev,
struct scatterlist *sg);
unsigned int (*dma_len)(struct ib_device *dev,
struct scatterlist *sg);
void (*sync_single_for_cpu)(struct ib_device *dev,
u64 dma_handle,
size_t size,
enum dma_data_direction dir);
void (*sync_single_for_device)(struct ib_device *dev,
u64 dma_handle,
size_t size,
enum dma_data_direction dir);
void *(*alloc_coherent)(struct ib_device *dev,
size_t size,
u64 *dma_handle,
gfp_t flag);
void (*free_coherent)(struct ib_device *dev,
size_t size, void *cpu_addr,
u64 dma_handle);
};
struct iw_cm_verbs;
+
+struct ib_port_immutable {
+ int pkey_tbl_len;
+ int gid_tbl_len;
+ u32 core_cap_flags;
+ u32 max_mad_size;
+};
+
struct ib_exp_device_attr;
struct ib_exp_qp_init_attr;
struct ib_device {
struct device *dma_device;
char name[IB_DEVICE_NAME_MAX];
struct list_head event_handler_list;
spinlock_t event_handler_lock;
spinlock_t client_data_lock;
struct list_head core_list;
struct list_head client_data_list;
struct ib_cache cache;
- int *pkey_tbl_len;
- int *gid_tbl_len;
+ /**
+ * port_immutable is indexed by port number
+ */
+ struct ib_port_immutable *port_immutable;
int num_comp_vectors;
struct iw_cm_verbs *iwcm;
int (*get_protocol_stats)(struct ib_device *device,
union rdma_protocol_stats *stats);
int (*query_device)(struct ib_device *device,
struct ib_device_attr *device_attr);
int (*query_port)(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
u8 port_num);
/* When calling get_netdev, the HW vendor's driver should return the
* net device of device @device at port @port_num. The function
* is called in rtnl_lock. The HW vendor's device driver must guarantee
* to return NULL before the net device has reached
* NETDEV_UNREGISTER_FINAL state.
*/
struct net_device *(*get_netdev)(struct ib_device *device,
u8 port_num);
int (*query_gid)(struct ib_device *device,
u8 port_num, int index,
union ib_gid *gid);
int (*query_pkey)(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
int (*modify_device)(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify);
int (*modify_port)(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
struct ib_udata *udata);
int (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_pd * (*alloc_pd)(struct ib_device *device,
struct ib_ucontext *context,
struct ib_udata *udata);
int (*dealloc_pd)(struct ib_pd *pd);
struct ib_ah * (*create_ah)(struct ib_pd *pd,
struct ib_ah_attr *ah_attr);
int (*modify_ah)(struct ib_ah *ah,
struct ib_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah,
struct ib_ah_attr *ah_attr);
int (*destroy_ah)(struct ib_ah *ah);
struct ib_srq * (*create_srq)(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
int (*modify_srq)(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq,
struct ib_srq_attr *srq_attr);
int (*destroy_srq)(struct ib_srq *srq);
int (*post_srq_recv)(struct ib_srq *srq,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
struct ib_qp * (*create_qp)(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_udata *udata);
int (*query_qp)(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
int (*destroy_qp)(struct ib_qp *qp);
int (*post_send)(struct ib_qp *qp,
struct ib_send_wr *send_wr,
struct ib_send_wr **bad_send_wr);
int (*post_recv)(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
struct ib_cq * (*create_cq)(struct ib_device *device,
struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask);
int (*destroy_cq)(struct ib_cq *cq);
int (*resize_cq)(struct ib_cq *cq, int cqe,
struct ib_udata *udata);
int (*poll_cq)(struct ib_cq *cq, int num_entries,
struct ib_wc *wc);
int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
int (*req_notify_cq)(struct ib_cq *cq,
enum ib_cq_notify_flags flags);
int (*req_ncomp_notif)(struct ib_cq *cq,
int wc_cnt);
struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
int mr_access_flags);
struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
u64 start, u64 length,
u64 virt_addr,
int mr_access_flags,
struct ib_udata *udata,
int mr_id);
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
int (*destroy_mr)(struct ib_mr *mr);
struct ib_mr * (*create_mr)(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
int page_list_len);
void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
int (*rereg_phys_mr)(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
enum ib_mw_type type);
int (*bind_mw)(struct ib_qp *qp,
struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
int (*dealloc_mw)(struct ib_mw *mw);
struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
int (*map_phys_fmr)(struct ib_fmr *fmr,
u64 *page_list, int list_len,
u64 iova);
int (*unmap_fmr)(struct list_head *fmr_list);
int (*dealloc_fmr)(struct ib_fmr *fmr);
int (*attach_mcast)(struct ib_qp *qp,
union ib_gid *gid,
u16 lid);
int (*detach_mcast)(struct ib_qp *qp,
union ib_gid *gid,
u16 lid);
int (*process_mad)(struct ib_device *device,
int process_mad_flags,
u8 port_num,
struct ib_wc *in_wc,
struct ib_grh *in_grh,
struct ib_mad *in_mad,
struct ib_mad *out_mad);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
struct ib_ucontext *ucontext,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
struct ib_flow * (*create_flow)(struct ib_qp *qp,
struct ib_flow_attr
*flow_attr,
int domain);
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
unsigned long (*get_unmapped_area)(struct file *file,
unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
int (*ioctl)(struct ib_ucontext *context,
unsigned int cmd,
unsigned long arg);
int (*query_values)(struct ib_device *device,
int q_values,
struct ib_device_values *values);
struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
struct device dev;
struct kobject *ports_parent;
struct list_head port_list;
enum {
IB_DEV_UNINITIALIZED,
IB_DEV_REGISTERED,
IB_DEV_UNREGISTERED
} reg_state;
int uverbs_abi_ver;
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
char node_desc[64];
__be64 node_guid;
u32 local_dma_lkey;
u8 node_type;
u8 phys_port_cnt;
int cmd_perf;
u64 cmd_avg;
u32 cmd_n;
spinlock_t cmd_perf_lock;
+
+ /**
+ * The following mandatory functions are used only at device
+ * registration. Keep functions such as these at the end of this
+ * structure to avoid cache line misses when accessing struct ib_device
+ * in fast paths.
+ */
+ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
+
/*
* Experimental data and functions
*/
int (*exp_query_device)(struct ib_device *device,
struct ib_exp_device_attr *device_attr);
struct ib_qp * (*exp_create_qp)(struct ib_pd *pd,
struct ib_exp_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
struct ib_dct * (*exp_create_dct)(struct ib_pd *pd,
struct ib_dct_init_attr *attr,
struct ib_udata *udata);
int (*exp_destroy_dct)(struct ib_dct *dct);
int (*exp_query_dct)(struct ib_dct *dct, struct ib_dct_attr *attr);
u64 uverbs_exp_cmd_mask;
};
struct ib_client {
char *name;
void (*add) (struct ib_device *);
void (*remove)(struct ib_device *);
struct list_head list;
};
struct ib_device *ib_alloc_device(size_t size);
void ib_dealloc_device(struct ib_device *device);
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
return udata->ops->copy_from(dest, udata, len);
}
static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
{
return udata->ops->copy_to(udata, src, len);
}
/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
* @cur_state: Current QP state
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
* @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
* checks that cur_state and next_state are valid QP states, that a
* transition from cur_state to next_state is allowed by the IB spec,
* and that the attribute mask supplied is allowed for the transition.
*/
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask,
enum rdma_link_layer ll);
int ib_register_event_handler (struct ib_event_handler *event_handler);
int ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(struct ib_event *event);
int ib_query_device(struct ib_device *device,
struct ib_device_attr *device_attr);
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
u8 port_num);
+static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+}
+
+static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags &
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+}
+
+static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+}
+
+static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
+{
+ return rdma_protocol_ib(device, port_num) ||
+ rdma_protocol_roce(device, port_num);
+}
+
+/**
+ * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Management Datagrams (MAD) are a required part of the InfiniBand
+ * specification and are supported on all InfiniBand devices. A slightly
+ * extended version are also supported on OPA interfaces.
+ *
+ * Return: true if the port supports sending/receiving of MAD packets.
+ */
+static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+}
+
+/**
+ * rdma_cap_opa_mad - Check if the port of device provides support for OPA
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Intel OmniPath devices extend and/or replace the InfiniBand Management
+ * datagrams with their own versions. These OPA MADs share many but not all of
+ * the characteristics of InfiniBand MADs.
+ *
+ * OPA MADs differ in the following ways:
+ *
+ * 1) MADs are variable size up to 2K
+ * IBTA defined MADs remain fixed at 256 bytes
+ * 2) OPA SMPs must carry valid PKeys
+ * 3) OPA SMP packets are a different format
+ *
+ * Return: true if the port supports OPA MAD packet formats.
+ */
+static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
+{
+ return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
+ == RDMA_CORE_CAP_OPA_MAD;
+}
+
+/**
+ * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband
+ * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI).
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Each InfiniBand node is required to provide a Subnet Management Agent
+ * that the subnet manager can access. Prior to the fabric being fully
+ * configured by the subnet manager, the SMA is accessed via a well known
+ * interface called the Subnet Management Interface (SMI). This interface
+ * uses directed route packets to communicate with the SM to get around the
+ * chicken and egg problem of the SM needing to know what's on the fabric
+ * in order to configure the fabric, and needing to configure the fabric in
+ * order to send packets to the devices on the fabric. These directed
+ * route packets do not need the fabric fully configured in order to reach
+ * their destination. The SMI is the only method allowed to send
+ * directed route packets on an InfiniBand fabric.
+ *
+ * Return: true if the port provides an SMI.
+ */
+static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+}
+
+/**
+ * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * The InfiniBand Communication Manager is one of many pre-defined General
+ * Service Agents (GSA) that are accessed via the General Service
+ * Interface (GSI). It's role is to facilitate establishment of connections
+ * between nodes as well as other management related tasks for established
+ * connections.
+ *
+ * Return: true if the port supports an IB CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+}
+
+/**
+ * rdma_cap_iw_cm - Check if the port of device has the capability IWARP
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Similar to above, but specific to iWARP connections which have a different
+ * managment protocol than InfiniBand.
+ *
+ * Return: true if the port supports an iWARP CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+}
+
+/**
+ * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband
+ * Subnet Administration.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * An InfiniBand Subnet Administration (SA) service is a pre-defined General
+ * Service Agent (GSA) provided by the Subnet Manager (SM). On InfiniBand
+ * fabrics, devices should resolve routes to other hosts by contacting the
+ * SA to query the proper route.
+ *
+ * Return: true if the port should act as a client to the fabric Subnet
+ * Administration interface. This does not imply that the SA service is
+ * running locally.
+ */
+static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+}
+
+/**
+ * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband
+ * Multicast.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand multicast registration is more complex than normal IPv4 or
+ * IPv6 multicast registration. Each Host Channel Adapter must register
+ * with the Subnet Manager when it wishes to join a multicast group. It
+ * should do so only once regardless of how many queue pairs it subscribes
+ * to this group. And it should leave the group only after all queue pairs
+ * attached to the group have been detached.
+ *
+ * Return: true if the port must undertake the additional adminstrative
+ * overhead of registering/unregistering with the SM and tracking of the
+ * total number of queue pairs attached to the multicast group.
+ */
+static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
+{
+ return rdma_cap_ib_sa(device, port_num);
+}
+
+/**
+ * rdma_cap_af_ib - Check if the port of device has the capability
+ * Native Infiniband Address.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default
+ * GID. RoCE uses a different mechanism, but still generates a GID via
+ * a prescribed mechanism and port specific data.
+ *
+ * Return: true if the port uses a GID address to identify devices on the
+ * network.
+ */
+static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+}
+
+/**
+ * rdma_cap_eth_ah - Check if the port of device has the capability
+ * Ethernet Address Handle.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE is InfiniBand over Ethernet, and it uses a well defined technique
+ * to fabricate GIDs over Ethernet/IP specific addresses native to the
+ * port. Normally, packet headers are generated by the sending host
+ * adapter, but when sending connectionless datagrams, we must manually
+ * inject the proper headers for the fabric we are communicating over.
+ *
+ * Return: true if we are running as a RoCE port and must force the
+ * addition of a Global Route Header built from our Ethernet Address
+ * Handle into our header list for connectionless packets.
+ */
+static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+}
+
+/**
+ * rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
+ *
+ * @device: Device
+ * @port_num: Port number
+ *
+ * This MAD size includes the MAD headers and MAD payload. No other headers
+ * are included.
+ *
+ * Return the max MAD size required by the Port. Will return 0 if the port
+ * does not support MADs
+ */
+static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].max_mad_size;
+}
+
+/*
+ * Check if the device supports READ W/ INVALIDATE.
+ */
+static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
+{
+ /*
+ * iWarp drivers must support READ W/ INVALIDATE. No other protocol
+ * has support for it yet.
+ */
+ return rdma_protocol_iwarp(dev, port_num);
+}
+
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid);
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify);
int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
/**
* ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
*
* A protection domain object provides an association between QPs, shared
* receive queues, address handles, memory regions, and memory windows.
*/
struct ib_pd *ib_alloc_pd(struct ib_device *device);
/**
* ib_dealloc_pd - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
*/
int ib_dealloc_pd(struct ib_pd *pd);
/**
* ib_create_ah - Creates an address handle for the given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
*
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
/**
* ib_init_ah_from_wc - Initializes address handle attributes from a
* work completion.
* @device: Device on which the received message arrived.
* @port_num: Port on which the received message arrived.
* @wc: Work completion associated with the received message.
* @grh: References the received global route header. This parameter is
* ignored unless the work completion indicates that the GRH is valid.
* @ah_attr: Returned attributes that can be used when creating an address
* handle for replying to the message.
*/
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
struct ib_grh *grh, struct ib_ah_attr *ah_attr);
/**
* ib_create_ah_from_wc - Creates an address handle associated with the
* sender of the specified work completion.
* @pd: The protection domain associated with the address handle.
* @wc: Work completion information associated with a received message.
* @grh: References the received global route header. This parameter is
* ignored unless the work completion indicates that the GRH is valid.
* @port_num: The outbound port number to associate with the address.
*
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
struct ib_grh *grh, u8 port_num);
/**
* ib_modify_ah - Modifies the address vector associated with an address
* handle.
* @ah: The address handle to modify.
* @ah_attr: The new address vector attributes to associate with the
* address handle.
*/
int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
/**
* ib_query_ah - Queries the address vector associated with an address
* handle.
* @ah: The address handle to query.
* @ah_attr: The address vector attributes associated with the address
* handle.
*/
int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
/**
* ib_destroy_ah - Destroys an address handle.
* @ah: The address handle to destroy.
*/
int ib_destroy_ah(struct ib_ah *ah);
/**
* ib_create_srq - Creates a SRQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the SRQ.
* @srq_init_attr: A list of initial attributes required to create the
* SRQ. If SRQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created SRQ.
*
* srq_attr->max_wr and srq_attr->max_sge are read the determine the
* requested size of the SRQ, and set to the actual values allocated
* on return. If ib_create_srq() succeeds, then max_wr and max_sge
* will always be at least as large as the requested values.
*/
struct ib_srq *ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr);
/**
* ib_modify_srq - Modifies the attributes for the specified SRQ.
* @srq: The SRQ to modify.
* @srq_attr: On input, specifies the SRQ attributes to modify. On output,
* the current values of selected SRQ attributes are returned.
* @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
* are being modified.
*
* The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
* IB_SRQ_LIMIT to set the SRQ's limit and request notification when
* the number of receives queued drops below the limit.
*/
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask);
/**
* ib_query_srq - Returns the attribute list and current values for the
* specified SRQ.
* @srq: The SRQ to query.
* @srq_attr: The attributes of the specified SRQ.
*/
int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr);
/**
* ib_destroy_srq - Destroys the specified SRQ.
* @srq: The SRQ to destroy.
*/
int ib_destroy_srq(struct ib_srq *srq);
/**
* ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
* @srq: The SRQ to post the work request on.
* @recv_wr: A list of work requests to post on the receive queue.
* @bad_recv_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
*/
static inline int ib_post_srq_recv(struct ib_srq *srq,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
}
/**
* ib_create_qp - Creates a QP associated with the specified protection
* domain.
* @pd: The protection domain associated with the QP.
* @qp_init_attr: A list of initial attributes required to create the
* QP. If QP creation succeeds, then the attributes are updated to
* the actual capabilities of the created QP.
*/
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr);
/**
* ib_modify_qp - Modifies the attributes for the specified QP and then
* transitions the QP to the given state.
* @qp: The QP to modify.
* @qp_attr: On input, specifies the QP attributes to modify. On output,
* the current values of selected QP attributes are returned.
* @qp_attr_mask: A bit-mask used to specify which attributes of the QP
* are being modified.
*/
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask);
/**
* ib_query_qp - Returns the attribute list and current values for the
* specified QP.
* @qp: The QP to query.
* @qp_attr: The attributes of the specified QP.
* @qp_attr_mask: A bit-mask used to select specific attributes to query.
* @qp_init_attr: Additional attributes of the selected QP.
*
* The qp_attr_mask may be used to limit the query to gathering only the
* selected attributes.
*/
int ib_query_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
/**
* ib_destroy_qp - Destroys the specified QP.
* @qp: The QP to destroy.
*/
int ib_destroy_qp(struct ib_qp *qp);
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
* @xrcd - XRC domain
* @qp_open_attr: Attributes identifying the QP to open.
*
* Returns a reference to a sharable QP.
*/
struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
struct ib_qp_open_attr *qp_open_attr);
/**
* ib_close_qp - Release an external reference to a QP.
* @qp: The QP handle to release
*
* The opened QP handle is released by the caller. The underlying
* shared QP is not destroyed until all internal references are released.
*/
int ib_close_qp(struct ib_qp *qp);
/**
* ib_post_send - Posts a list of work requests to the send queue of
* the specified QP.
* @qp: The QP to post the work request on.
* @send_wr: A list of work requests to post on the send queue.
* @bad_send_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
*
* While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
* error is returned, the QP state shall not be affected,
* ib_post_send() will return an immediate error after queueing any
* earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,
struct ib_send_wr **bad_send_wr)
{
return qp->device->post_send(qp, send_wr, bad_send_wr);
}
/**
* ib_post_recv - Posts a list of work requests to the receive queue of
* the specified QP.
* @qp: The QP to post the work request on.
* @recv_wr: A list of work requests to post on the receive queue.
* @bad_recv_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
*/
static inline int ib_post_recv(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
}
/**
* ib_create_cq - Creates a CQ on the specified device.
* @device: The device on which to create the CQ.
* @comp_handler: A user-specified callback that is invoked when a
* completion event occurs on the CQ.
* @event_handler: A user-specified callback that is invoked when an
* asynchronous event not associated with a completion occurs on the CQ.
* @cq_context: Context associated with the CQ returned to the user via
* the associated completion and event handlers.
* @cqe: The minimum size of the CQ.
* @comp_vector - Completion vector used to signal completion events.
* Must be >= 0 and < context->num_comp_vectors.
*
* Users can examine the cq structure to determine the actual CQ size.
*/
struct ib_cq *ib_create_cq(struct ib_device *device,
ib_comp_handler comp_handler,
void (*event_handler)(struct ib_event *, void *),
void *cq_context, int cqe, int comp_vector);
/**
* ib_resize_cq - Modifies the capacity of the CQ.
* @cq: The CQ to resize.
* @cqe: The minimum size of the CQ.
*
* Users can examine the cq structure to determine the actual CQ size.
*/
int ib_resize_cq(struct ib_cq *cq, int cqe);
/**
* ib_modify_cq - Modifies the attributes for the specified CQ and then
* transitions the CQ to the given state.
* @cq: The CQ to modify.
* @cq_attr: specifies the CQ attributes to modify.
* @cq_attr_mask: A bit-mask used to specify which attributes of the CQ
* are being modified.
*/
int ib_modify_cq(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask);
/**
* ib_destroy_cq - Destroys the specified CQ.
* @cq: The CQ to destroy.
*/
int ib_destroy_cq(struct ib_cq *cq);
/**
* ib_poll_cq - poll a CQ for completion(s)
* @cq:the CQ being polled
* @num_entries:maximum number of completions to return
* @wc:array of at least @num_entries &struct ib_wc where completions
* will be returned
*
* Poll a CQ for (possibly multiple) completions. If the return value
* is < 0, an error occurred. If the return value is >= 0, it is the
* number of completions returned. If the return value is
* non-negative and < num_entries, then the CQ was emptied.
*/
static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
struct ib_wc *wc)
{
return cq->device->poll_cq(cq, num_entries, wc);
}
/**
* ib_peek_cq - Returns the number of unreaped completions currently
* on the specified CQ.
* @cq: The CQ to peek.
* @wc_cnt: A minimum number of unreaped completions to check for.
*
* If the number of unreaped completions is greater than or equal to wc_cnt,
* this function returns wc_cnt, otherwise, it returns the actual number of
* unreaped completions.
*/
int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
/**
* ib_req_notify_cq - Request completion notification on a CQ.
* @cq: The CQ to generate an event for.
* @flags:
* Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
* to request an event on the next solicited event or next work
* completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
* may also be |ed in to request a hint about missed events, as
* described below.
*
* Return Value:
* < 0 means an error occurred while requesting notification
* == 0 means notification was requested successfully, and if
* IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
* were missed and it is safe to wait for another event. In
* this case is it guaranteed that any work completions added
* to the CQ since the last CQ poll will trigger a completion
* notification event.
* > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
* in. It means that the consumer must poll the CQ again to
* make sure it is empty to avoid missing an event because of a
* race between requesting notification and an entry being
* added to the CQ. This return value means it is possible
* (but not guaranteed) that a work completion has been added
* to the CQ since the last poll without triggering a
* completion notification event.
*/
static inline int ib_req_notify_cq(struct ib_cq *cq,
enum ib_cq_notify_flags flags)
{
return cq->device->req_notify_cq(cq, flags);
}
/**
* ib_req_ncomp_notif - Request completion notification when there are
* at least the specified number of unreaped completions on the CQ.
* @cq: The CQ to generate an event for.
* @wc_cnt: The number of unreaped completions that should be on the
* CQ before an event is generated.
*/
static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
{
return cq->device->req_ncomp_notif ?
cq->device->req_ncomp_notif(cq, wc_cnt) :
-ENOSYS;
}
/**
* ib_get_dma_mr - Returns a memory region for system memory that is
* usable for DMA.
* @pd: The protection domain associated with the memory region.
* @mr_access_flags: Specifies the memory access rights.
*
* Note that the ib_dma_*() functions defined below must be used
* to create/destroy addresses used with the Lkey or Rkey returned
* by ib_get_dma_mr().
*/
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
/**
* ib_dma_mapping_error - check a DMA addr for error
* @dev: The device for which the dma_addr was created
* @dma_addr: The DMA address to check
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
if (dev->dma_ops)
return dev->dma_ops->mapping_error(dev, dma_addr);
return dma_mapping_error(dev->dma_device, dma_addr);
}
/**
* ib_dma_map_single - Map a kernel virtual address to DMA address
* @dev: The device for which the dma_addr is to be created
* @cpu_addr: The kernel virtual address
* @size: The size of the region in bytes
* @direction: The direction of the DMA
*/
static inline u64 ib_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
if (dev->dma_ops)
return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}
/**
* ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
* @dev: The device for which the DMA address was created
* @addr: The DMA address
* @size: The size of the region in bytes
* @direction: The direction of the DMA
*/
static inline void ib_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
if (dev->dma_ops)
dev->dma_ops->unmap_single(dev, addr, size, direction);
else
dma_unmap_single(dev->dma_device, addr, size, direction);
}
static inline u64 ib_dma_map_single_attrs(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
return dma_map_single_attrs(dev->dma_device, cpu_addr, size,
direction, attrs);
}
static inline void ib_dma_unmap_single_attrs(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
return dma_unmap_single_attrs(dev->dma_device, addr, size,
direction, attrs);
}
/**
* ib_dma_map_page - Map a physical page to DMA address
* @dev: The device for which the dma_addr is to be created
* @page: The page to be mapped
* @offset: The offset within the page
* @size: The size of the region in bytes
* @direction: The direction of the DMA
*/
static inline u64 ib_dma_map_page(struct ib_device *dev,
struct page *page,
unsigned long offset,
size_t size,
enum dma_data_direction direction)
{
if (dev->dma_ops)
return dev->dma_ops->map_page(dev, page, offset, size, direction);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}
/**
* ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
* @dev: The device for which the DMA address was created
* @addr: The DMA address
* @size: The size of the region in bytes
* @direction: The direction of the DMA
*/
static inline void ib_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
if (dev->dma_ops)
dev->dma_ops->unmap_page(dev, addr, size, direction);
else
dma_unmap_page(dev->dma_device, addr, size, direction);
}
/**
* ib_dma_map_sg - Map a scatter/gather list to DMA addresses
* @dev: The device for which the DMA addresses are to be created
* @sg: The array of scatter/gather entries
* @nents: The number of scatter/gather entries
* @direction: The direction of the DMA
*/
static inline int ib_dma_map_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
if (dev->dma_ops)
return dev->dma_ops->map_sg(dev, sg, nents, direction);
return dma_map_sg(dev->dma_device, sg, nents, direction);
}
/**
* ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
* @dev: The device for which the DMA addresses were created
* @sg: The array of scatter/gather entries
* @nents: The number of scatter/gather entries
* @direction: The direction of the DMA
*/
static inline void ib_dma_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
if (dev->dma_ops)
dev->dma_ops->unmap_sg(dev, sg, nents, direction);
else
dma_unmap_sg(dev->dma_device, sg, nents, direction);
}
static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs);
}
static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs);
}
/**
* ib_sg_dma_address - Return the DMA address from a scatter/gather entry
* @dev: The device for which the DMA addresses were created
* @sg: The scatter/gather entry
*/
static inline u64 ib_sg_dma_address(struct ib_device *dev,
struct scatterlist *sg)
{
if (dev->dma_ops)
return dev->dma_ops->dma_address(dev, sg);
return sg_dma_address(sg);
}
/**
* ib_sg_dma_len - Return the DMA length from a scatter/gather entry
* @dev: The device for which the DMA addresses were created
* @sg: The scatter/gather entry
*/
static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
struct scatterlist *sg)
{
if (dev->dma_ops)
return dev->dma_ops->dma_len(dev, sg);
return sg_dma_len(sg);
}
/**
* ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
* @dev: The device for which the DMA address was created
* @addr: The DMA address
* @size: The size of the region in bytes
* @dir: The direction of the DMA
*/
static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size,
enum dma_data_direction dir)
{
if (dev->dma_ops)
dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
else
dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}
/**
* ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
* @dev: The device for which the DMA address was created
* @addr: The DMA address
* @size: The size of the region in bytes
* @dir: The direction of the DMA
*/
static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
u64 addr,
size_t size,
enum dma_data_direction dir)
{
if (dev->dma_ops)
dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
else
dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}
/**
* ib_dma_alloc_coherent - Allocate memory and map it for DMA
* @dev: The device for which the DMA address is requested
* @size: The size of the region to allocate in bytes
* @dma_handle: A pointer for returning the DMA address of the region
* @flag: memory allocator flags
*/
static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
size_t size,
u64 *dma_handle,
gfp_t flag)
{
if (dev->dma_ops)
return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
else {
dma_addr_t handle;
void *ret;
ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
*dma_handle = handle;
return ret;
}
}
/**
* ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
* @dev: The device for which the DMA addresses were allocated
* @size: The size of the region
* @cpu_addr: the address returned by ib_dma_alloc_coherent()
* @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
*/
static inline void ib_dma_free_coherent(struct ib_device *dev,
size_t size, void *cpu_addr,
u64 dma_handle)
{
if (dev->dma_ops)
dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
else
dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
}
/**
* ib_reg_phys_mr - Prepares a virtually addressed memory region for use
* by an HCA.
* @pd: The protection domain associated assigned to the registered region.
* @phys_buf_array: Specifies a list of physical buffers to use in the
* memory region.
* @num_phys_buf: Specifies the size of the phys_buf_array.
* @mr_access_flags: Specifies the memory access rights.
* @iova_start: The offset of the region's starting I/O virtual address.
*/
struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
/**
* ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
* Conceptually, this call performs the functions deregister memory region
* followed by register physical memory region. Where possible,
* resources are reused instead of deallocated and reallocated.
* @mr: The memory region to modify.
* @mr_rereg_mask: A bit-mask used to indicate which of the following
* properties of the memory region are being modified.
* @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
* the new protection domain to associated with the memory region,
* otherwise, this parameter is ignored.
* @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
* field specifies a list of physical buffers to use in the new
* translation, otherwise, this parameter is ignored.
* @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
* field specifies the size of the phys_buf_array, otherwise, this
* parameter is ignored.
* @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
* field specifies the new memory access rights, otherwise, this
* parameter is ignored.
* @iova_start: The offset of the region's starting I/O virtual address.
*/
int ib_rereg_phys_mr(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
/**
* ib_query_mr - Retrieves information about a specific memory region.
* @mr: The memory region to retrieve information about.
* @mr_attr: The attributes of the specified memory region.
*/
int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
/**
* ib_dereg_mr - Deregisters a memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
*
* This function can fail, if the memory region has memory windows bound to it.
*/
int ib_dereg_mr(struct ib_mr *mr);
/**
* ib_create_mr - Allocates a memory region that may be used for
* signature handover operations.
* @pd: The protection domain associated with the region.
* @mr_init_attr: memory region init attributes.
*/
struct ib_mr *ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
/**
* ib_destroy_mr - Destroys a memory region that was created using
* ib_create_mr and removes it from HW translation tables.
* @mr: The memory region to destroy.
*
* This function can fail, if the memory region has memory windows bound to it.
*/
int ib_destroy_mr(struct ib_mr *mr);
/**
* ib_alloc_fast_reg_mr - Allocates memory region usable with the
* IB_WR_FAST_REG_MR send work request.
* @pd: The protection domain associated with the region.
* @max_page_list_len: requested max physical buffer list length to be
* used with fast register work requests for this MR.
*/
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
/**
* ib_alloc_fast_reg_page_list - Allocates a page list array
* @device - ib device pointer.
* @page_list_len - size of the page list array to be allocated.
*
* This allocates and returns a struct ib_fast_reg_page_list * and a
* page_list array that is at least page_list_len in size. The actual
* size is returned in max_page_list_len. The caller is responsible
* for initializing the contents of the page_list array before posting
* a send work request with the IB_WC_FAST_REG_MR opcode.
*
* The page_list array entries must be translated using one of the
* ib_dma_*() functions just like the addresses passed to
* ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
* ib_fast_reg_page_list must not be modified by the caller until the
* IB_WC_FAST_REG_MR work request completes.
*/
struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
struct ib_device *device, int page_list_len);
/**
* ib_free_fast_reg_page_list - Deallocates a previously allocated
* page list array.
* @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
*/
void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
* R_Key and L_Key.
* @mr - struct ib_mr pointer to be updated.
* @newkey - new key to be used.
*/
static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
{
mr->lkey = (mr->lkey & 0xffffff00) | newkey;
mr->rkey = (mr->rkey & 0xffffff00) | newkey;
}
/**
* ib_inc_rkey - increments the key portion of the given rkey. Can be used
* for calculating a new rkey for type 2 memory windows.
* @rkey - the rkey to increment.
*/
static inline u32 ib_inc_rkey(u32 rkey)
{
const u32 mask = 0x000000ff;
return ((rkey + 1) & mask) | (rkey & ~mask);
}
/**
* ib_alloc_mw - Allocates a memory window.
* @pd: The protection domain associated with the memory window.
* @type: The type of the memory window (1 or 2).
*/
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
/**
* ib_bind_mw - Posts a work request to the send queue of the specified
* QP, which binds the memory window to the given address range and
* remote access attributes.
* @qp: QP to post the bind work request on.
* @mw: The memory window to bind.
* @mw_bind: Specifies information about the memory window, including
* its address range, remote access rights, and associated memory region.
*
* If there is no immediate error, the function will update the rkey member
* of the mw parameter to its new value. The bind operation can still fail
* asynchronously.
*/
static inline int ib_bind_mw(struct ib_qp *qp,
struct ib_mw *mw,
struct ib_mw_bind *mw_bind)
{
/* XXX reference counting in corresponding MR? */
return mw->device->bind_mw ?
mw->device->bind_mw(qp, mw, mw_bind) :
-ENOSYS;
}
/**
* ib_dealloc_mw - Deallocates a memory window.
* @mw: The memory window to deallocate.
*/
int ib_dealloc_mw(struct ib_mw *mw);
/**
* ib_alloc_fmr - Allocates a unmapped fast memory region.
* @pd: The protection domain associated with the unmapped region.
* @mr_access_flags: Specifies the memory access rights.
* @fmr_attr: Attributes of the unmapped region.
*
* A fast memory region must be mapped before it can be used as part of
* a work request.
*/
struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
/**
* ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
* @fmr: The fast memory region to associate with the pages.
* @page_list: An array of physical pages to map to the fast memory region.
* @list_len: The number of pages in page_list.
* @iova: The I/O virtual address to use with the mapped region.
*/
static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
u64 *page_list, int list_len,
u64 iova)
{
return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
}
/**
* ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
* @fmr_list: A linked list of fast memory regions to unmap.
*/
int ib_unmap_fmr(struct list_head *fmr_list);
/**
* ib_dealloc_fmr - Deallocates a fast memory region.
* @fmr: The fast memory region to deallocate.
*/
int ib_dealloc_fmr(struct ib_fmr *fmr);
/**
* ib_attach_mcast - Attaches the specified QP to a multicast group.
* @qp: QP to attach to the multicast group. The QP must be type
* IB_QPT_UD.
* @gid: Multicast group GID.
* @lid: Multicast group LID in host byte order.
*
* In order to send and receive multicast packets, subnet
* administration must have created the multicast group and configured
* the fabric appropriately. The port associated with the specified
* QP must also be a member of the multicast group.
*/
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
/**
* ib_detach_mcast - Detaches the specified QP from a multicast group.
* @qp: QP to detach from the multicast group.
* @gid: Multicast group GID.
* @lid: Multicast group LID in host byte order.
*/
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
/**
* ib_alloc_xrcd - Allocates an XRC domain.
* @device: The device on which to allocate the XRC domain.
*/
struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
/**
* ib_dealloc_xrcd - Deallocates an XRC domain.
* @xrcd: The XRC domain to deallocate.
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
struct ib_flow *ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr, int domain);
int ib_destroy_flow(struct ib_flow *flow_id);
struct ib_dct *ib_create_dct(struct ib_pd *pd, struct ib_dct_init_attr *attr,
struct ib_udata *udata);
int ib_destroy_dct(struct ib_dct *dct);
int ib_query_dct(struct ib_dct *dct, struct ib_dct_attr *attr);
int ib_query_values(struct ib_device *device,
int q_values, struct ib_device_values *values);
static inline void ib_active_speed_enum_to_rate(u8 active_speed,
int *rate,
char **speed)
{
switch (active_speed) {
case IB_SPEED_DDR:
*speed = " DDR";
*rate = 50;
break;
case IB_SPEED_QDR:
*speed = " QDR";
*rate = 100;
break;
case IB_SPEED_FDR10:
*speed = " FDR10";
*rate = 100;
break;
case IB_SPEED_FDR:
*speed = " FDR";
*rate = 140;
break;
case IB_SPEED_EDR:
*speed = " EDR";
*rate = 250;
break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
*rate = 25;
break;
}
}
static inline int ib_check_mr_access(int flags)
{
/*
* Local write permission is required if remote write or
* remote atomic permission is also requested.
*/
if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
!(flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
return 0;
}
/**
* ib_check_mr_status: lightweight check of MR status.
* This routine may provide status checks on a selected
* ib_mr. first use is for signature status check.
*
* @mr: A memory region.
* @check_mask: Bitmask of which checks to perform from
* ib_mr_status_check enumeration.
* @mr_status: The container of relevant status checks.
* failed checks will be indicated in the status bitmask
* and the relevant info shall be in the error item.
*/
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
#endif /* IB_VERBS_H */

File Metadata

Mime Type
text/x-diff
Expires
Wed, Nov 12, 11:47 PM (1 d, 22 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25184900
Default Alt Text
(523 KB)

Event Timeline