diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -4628,6 +4628,8 @@ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_cma.c optional ofed \ compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_core_uverbs.c optional ofed \ + compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_cq.c optional ofed \ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_device.c optional ofed \ @@ -4648,6 +4650,8 @@ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_packer.c optional ofed \ compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_rdma_core.c optional ofed \ + compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c optional ofed \ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_sa_query.c optional ofed \ @@ -4668,10 +4672,30 @@ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_uverbs_cmd.c optional ofed \ compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_ioctl.c optional ofed \ + compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_uverbs_main.c optional ofed \ compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_uverbs_marshall.c optional ofed \ compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c optional ofed \ + compile-with "${OFED_C}" +ofed/drivers/infiniband/core/ib_uverbs_uapi.c optional ofed \ + compile-with "${OFED_C}" ofed/drivers/infiniband/core/ib_verbs.c optional ofed \ compile-with "${OFED_C}" @@ -4827,6 +4851,8 @@ compile-with "${OFED_C}" dev/mlx5/mlx5_ib/mlx5_ib_cq.c optional mlx5ib pci ofed \ compile-with "${OFED_C}" +dev/mlx5/mlx5_ib/mlx5_ib_devx.c optional mlx5ib pci ofed \ + compile-with "${OFED_C}" dev/mlx5/mlx5_ib/mlx5_ib_doorbell.c optional mlx5ib pci ofed \ compile-with "${OFED_C}" dev/mlx5/mlx5_ib/mlx5_ib_gsi.c optional mlx5ib pci ofed \ diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c --- a/sys/dev/cxgbe/iw_cxgbe/cq.c +++ b/sys/dev/cxgbe/iw_cxgbe/cq.c @@ -883,7 +883,7 @@ return !err || err == -ENODATA ? npolled : err; } -int c4iw_destroy_cq(struct ib_cq *ib_cq) +void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; @@ -895,22 +895,20 @@ atomic_dec(&chp->refcnt); wait_event(chp->wait, !atomic_read(&chp->refcnt)); - ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) - : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); - kfree(chp); - return 0; } -struct ib_cq * -c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata) +int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; struct c4iw_dev *rhp; - struct c4iw_cq *chp; + struct c4iw_cq *chp = to_c4iw_cq(ibcq); struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret; @@ -919,17 +917,12 @@ CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries); if (attr->flags) - return ERR_PTR(-EINVAL); + return -EINVAL; rhp = to_c4iw_dev(ibdev); - chp = kzalloc(sizeof(*chp), GFP_KERNEL); - if (!chp) - return ERR_PTR(-ENOMEM); - - - if (ib_context) - ucontext = to_c4iw_ucontext(ib_context); + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); /* account for the status page. */ entries++; @@ -1020,7 +1013,7 @@ "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx", __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); - return &chp->ibcq; + return 0; err5: kfree(mm2); err4: @@ -1031,8 +1024,7 @@ destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); err1: - kfree(chp); - return ERR_PTR(ret); + return ret; } int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h --- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h +++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h @@ -55,6 +55,7 @@ #include #include +#include #include "common/common.h" #include "common/t4_msg.h" @@ -474,6 +475,14 @@ u8 send_term; }; +struct c4iw_ib_srq { + struct ib_srq ibsrq; +}; + +struct c4iw_ib_ah { + struct ib_ah ibah; +}; + struct c4iw_qp { struct ib_qp ibqp; struct c4iw_dev *rhp; @@ -501,7 +510,6 @@ u32 key; spinlock_t mmap_lock; struct list_head mmaps; - struct kref kref; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -509,17 +517,6 @@ return container_of(c, struct c4iw_ucontext, ibucontext); } -void _c4iw_free_ucontext(struct kref *kref); - -static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext) -{ - kref_put(&ucontext->kref, _c4iw_free_ucontext); -} -static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext) -{ - kref_get(&ucontext->kref); -} - struct c4iw_mm_entry { struct list_head entry; u64 addr; @@ -938,7 +935,7 @@ void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); @@ -947,16 +944,15 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); -int c4iw_dereg_mr(struct ib_mr *ib_mr); +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); -int c4iw_destroy_cq(struct ib_cq *ib_cq); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, - struct ib_udata *udata); +void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int c4iw_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); -int c4iw_destroy_qp(struct ib_qp *ib_qp); +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata); diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c --- a/sys/dev/cxgbe/iw_cxgbe/mem.c +++ b/sys/dev/cxgbe/iw_cxgbe/mem.c @@ -610,7 +610,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -700,7 +700,7 @@ } -int c4iw_dereg_mr(struct ib_mr *ib_mr) +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_mr *mhp; diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c --- a/sys/dev/cxgbe/iw_cxgbe/provider.c +++ b/sys/dev/cxgbe/iw_cxgbe/provider.c @@ -58,16 +58,15 @@ return -ENOSYS; } -static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr, - struct ib_udata *udata) +static int c4iw_ah_create(struct ib_ah *ah, + struct ib_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata) { - return ERR_PTR(-ENOSYS); + return -ENOSYS; } -static int c4iw_ah_destroy(struct ib_ah *ah) +static void c4iw_ah_destroy(struct ib_ah *ah, u32 flags) { - return -ENOSYS; } static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -93,35 +92,27 @@ return -ENOSYS; } -void _c4iw_free_ucontext(struct kref *kref) +static void c4iw_dealloc_ucontext(struct ib_ucontext *context) { - struct c4iw_ucontext *ucontext; + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); struct c4iw_dev *rhp; struct c4iw_mm_entry *mm, *tmp; - ucontext = container_of(kref, struct c4iw_ucontext, kref); + pr_debug("context %p\n", context); rhp = to_c4iw_dev(ucontext->ibucontext.device); CTR2(KTR_IW_CXGBE, "%s ucontext %p", __func__, ucontext); + list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); - kfree(ucontext); } -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext, + struct ib_udata *udata) { - struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); - - CTR2(KTR_IW_CXGBE, "%s context %p", __func__, context); - c4iw_put_ucontext(ucontext); - return 0; -} - -static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct c4iw_ucontext *context; + struct ib_device *ibdev = ucontext->device; + struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext); struct c4iw_dev *rhp = to_c4iw_dev(ibdev); static int warned; struct c4iw_alloc_ucontext_resp uresp; @@ -129,16 +120,9 @@ struct c4iw_mm_entry *mm = NULL; PDBG("%s ibdev %p\n", __func__, ibdev); - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = -ENOMEM; - goto err; - } - c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); - kref_init(&context->kref); if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { if (!warned++) @@ -150,7 +134,7 @@ mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) - goto err_free; + goto err; uresp.status_page_size = PAGE_SIZE; @@ -169,13 +153,11 @@ mm->len = PAGE_SIZE; insert_mmap(context, mm); } - return &context->ibucontext; + return 0; err_mm: kfree(mm); -err_free: - kfree(context); err: - return ERR_PTR(ret); + return ret; } static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) @@ -226,8 +208,8 @@ return ret; } -static int -c4iw_deallocate_pd(struct ib_pd *pd) +static void +c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_pd *php = to_c4iw_pd(pd); struct c4iw_dev *rhp = php->rhp; @@ -238,36 +220,29 @@ mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); - kfree(php); - - return (0); } -static struct ib_pd * -c4iw_allocate_pd(struct ib_device *ibdev, struct ib_ucontext *context, - struct ib_udata *udata) +static int +c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) { - struct c4iw_pd *php; + struct c4iw_pd *php = to_c4iw_pd(pd); + struct ib_device *ibdev = pd->device; u32 pdid; struct c4iw_dev *rhp; - CTR4(KTR_IW_CXGBE, "%s: ibdev %p, context %p, data %p", __func__, ibdev, - context, udata); + CTR4(KTR_IW_CXGBE, "%s: ibdev %p, pd %p, data %p", __func__, ibdev, + pd, udata); rhp = (struct c4iw_dev *) ibdev; pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) - return ERR_PTR(-EINVAL); - php = kzalloc(sizeof(*php), GFP_KERNEL); - if (!php) { - c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); - return ERR_PTR(-ENOMEM); - } + return -EINVAL; + php->pdid = pdid; php->rhp = rhp; - if (context) { + if (udata) { if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) { - c4iw_deallocate_pd(&php->ibpd); - return ERR_PTR(-EFAULT); + c4iw_deallocate_pd(&php->ibpd, udata); + return -EFAULT; } } mutex_lock(&rhp->rdev.stats.lock); @@ -276,10 +251,10 @@ rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; mutex_unlock(&rhp->rdev.stats.lock); - CTR6(KTR_IW_CXGBE, + CTR5(KTR_IW_CXGBE, "%s: ibdev %p, context %p, data %p, pddid 0x%x, pd %p", __func__, - ibdev, context, udata, pdid, php); - return (&php->ibpd); + ibdev, udata, pdid, php); + return (0); } static int @@ -436,6 +411,13 @@ ret = linux_pci_attach_device(sc->dev, NULL, NULL, &dev->pdev); if (ret) return (ret); + +#define c4iw_ib_cq c4iw_cq +#define c4iw_ib_pd c4iw_pd +#define c4iw_ib_qp c4iw_qp +#define c4iw_ib_ucontext c4iw_ucontext + INIT_IB_DEVICE_OPS(&ibdev->ops, c4iw, CXGB4); + strlcpy(ibdev->name, device_get_nameunit(sc->dev), sizeof(ibdev->name)); memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid)); memcpy(&ibdev->node_guid, sc->port[0]->vi[0].hw_addr, ETHER_ADDR_LEN); diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c --- a/sys/dev/cxgbe/iw_cxgbe/qp.c +++ b/sys/dev/cxgbe/iw_cxgbe/qp.c @@ -583,8 +583,6 @@ destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - if (ucontext) - c4iw_put_ucontext(ucontext); kfree(qhp); } @@ -1678,7 +1676,7 @@ return ret; } -int c4iw_destroy_qp(struct ib_qp *ib_qp) +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; @@ -1879,7 +1877,6 @@ rq_db_key_mm->len); insert_mmap(ucontext, rq_db_key_mm); - c4iw_get_ucontext(ucontext); qhp->ucontext = ucontext; } qhp->ibqp.qp_num = qhp->wq.sq.qid; diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib.h b/sys/dev/mlx4/mlx4_ib/mlx4_ib.h --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib.h +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib.h @@ -83,16 +83,11 @@ HW_BAR_COUNT }; -struct mlx4_ib_vma_private_data { - struct vm_area_struct *vma; -}; - struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; struct list_head db_page_list; struct mutex db_page_mutex; - struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; }; struct mlx4_ib_pd { @@ -726,39 +721,37 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int mlx4_ib_dereg_mr(struct ib_mr *mr); +int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); -struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); -int mlx4_ib_destroy_cq(struct ib_cq *cq); +int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, - struct ib_udata *udata); +int mlx4_ib_create_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct ib_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah); +void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int mlx4_ib_destroy_srq(struct ib_srq *srq); +void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); @@ -766,7 +759,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -int mlx4_ib_destroy_qp(struct ib_qp *qp); +int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_ah.c @@ -42,10 +42,11 @@ #include "mlx4_ib.h" -static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, - struct mlx4_ib_ah *ah) +static int create_ib_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr) { - struct mlx4_dev *dev = to_mdev(pd->device)->dev; + struct ib_pd *pd = ib_ah->pd; + struct mlx4_ib_ah *ah = to_mah(ib_ah); + struct mlx4_dev *dev = to_mdev(ib_ah->device)->dev; ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.ib.g_slid = ah_attr->src_path_bits; @@ -67,14 +68,14 @@ !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) --ah->av.ib.stat_rate; } - - return &ah->ibah; + return 0; } -static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, - struct mlx4_ib_ah *ah) +static int create_iboe_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr) { - struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct ib_pd *pd = ib_ah->pd; + struct mlx4_ib_dev *ibdev = to_mdev(ib_ah->device); + struct mlx4_ib_ah *ah = to_mah(ib_ah); struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; @@ -93,7 +94,7 @@ ret = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid, &gid_attr); if (ret) - return ERR_PTR(ret); + return ret; eth_zero_addr(ah->av.eth.s_mac); if (gid_attr.ndev) { vlan_tag = rdma_vlan_dev_vlan_id(gid_attr.ndev); @@ -105,7 +106,7 @@ ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); if (ret < 0) - return ERR_PTR(ret); + return ret; ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = ah_attr->grh.hop_limit; @@ -125,23 +126,15 @@ memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); - return &ah->ibah; + return 0; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, - struct ib_udata *udata) - +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx4_ib_ah *ah; - struct ib_ah *ret; - - ah = kzalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { + if (rdma_port_get_link_layer(ib_ah->pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { if (!(ah_attr->ah_flags & IB_AH_GRH)) { - ret = ERR_PTR(-EINVAL); + return -EINVAL; } else { /* * TBD: need to handle the case when we get @@ -151,15 +144,35 @@ * local addresses which we can translate * without going to sleep. */ - ret = create_iboe_ah(pd, ah_attr, ah); + return create_iboe_ah(ib_ah, ah_attr); } + } + return create_ib_ah(ib_ah, ah_attr); +} - if (IS_ERR(ret)) - kfree(ah); +int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct ib_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, u16 vlan_tag) +{ + struct ib_ah_attr slave_attr = *ah_attr; + struct mlx4_ib_ah *mah = to_mah(ah); + int ret; + slave_attr.grh.sgid_index = slave_sgid_index; + ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + if (ret) return ret; - } else - return create_ib_ah(pd, ah_attr, ah); /* never fails */ + + /* get rid of force-loopback bit */ + mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + + if (rdma_port_get_link_layer(ah->pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) + memcpy(mah->av.eth.s_mac, s_mac, 6); + + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; + mah->av.eth.vlan = cpu_to_be16(vlan_tag); + + return 0; } int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) @@ -195,8 +208,7 @@ return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah) +void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_cq.c @@ -39,6 +39,7 @@ #include "mlx4_ib.h" #include +#include static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { @@ -135,14 +136,16 @@ mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } -static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, - struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, - u64 buf_addr, int cqe) +static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, + struct mlx4_ib_cq_buf *buf, + struct ib_umem **umem, u64 buf_addr, int cqe) { int err; int cqe_size = dev->dev->caps.cqe_size; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); - *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, + *umem = ib_umem_get(&context->ibucontext, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -168,27 +171,25 @@ } #define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) +int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_cq *cq; + struct mlx4_ib_cq *cq = to_mcq(ibcq); struct mlx4_uar *uar; + void *buf_addr; int err; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); if (entries < 1 || entries > dev->dev->caps.max_cqes) - return ERR_PTR(-EINVAL); + return -EINVAL; if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) - return ERR_PTR(-EINVAL); - - cq = kmalloc(sizeof *cq, GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + return -EINVAL; entries = roundup_pow_of_two(entries + 1); cq->ibcq.cqe = entries - 1; @@ -200,7 +201,7 @@ INIT_LIST_HEAD(&cq->send_qp_list); INIT_LIST_HEAD(&cq->recv_qp_list); - if (context) { + if (udata) { struct mlx4_ib_create_cq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -208,17 +209,17 @@ goto err_cq; } - err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, + buf_addr = (void *)(unsigned long)ucmd.buf_addr; + err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem, ucmd.buf_addr, entries); if (err) goto err_cq; - err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, - &cq->db); + err = mlx4_ib_db_map_user(context, ucmd.db_addr, &cq->db); if (err) goto err_mtt; - uar = &to_mucontext(context)->uar; + uar = &context->uar; } else { err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); if (err) @@ -233,6 +234,8 @@ if (err) goto err_db; + buf_addr = &cq->buf.buf; + uar = &dev->priv_uar; } @@ -248,37 +251,33 @@ cq->mcq.comp = mlx4_ib_cq_comp; cq->mcq.event = mlx4_ib_cq_event; - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { err = -EFAULT; goto err_cq_free; } - return &cq->ibcq; + return 0; err_cq_free: mlx4_cq_free(dev->dev, &cq->mcq); err_dbmap: - if (context) - mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); + if (udata) + mlx4_ib_db_unmap_user(context, &cq->db); err_mtt: mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); - if (context) - ib_umem_release(cq->umem); - else + ib_umem_release(cq->umem); + if (!udata) mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: - if (!context) + if (!udata) mlx4_db_free(dev->dev, &cq->db); - err_cq: - kfree(cq); - - return ERR_PTR(err); + return err; } static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, @@ -321,7 +320,7 @@ if (!cq->resize_buf) return -ENOMEM; - err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf, + err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf, &cq->resize_umem, ucmd.buf_addr, entries); if (err) { kfree(cq->resize_buf); @@ -460,18 +459,15 @@ kfree(cq->resize_buf); cq->resize_buf = NULL; - if (cq->resize_umem) { - ib_umem_release(cq->resize_umem); - cq->resize_umem = NULL; - } - + ib_umem_release(cq->resize_umem); + cq->resize_umem = NULL; out: mutex_unlock(&cq->resize_mutex); return err; } -int mlx4_ib_destroy_cq(struct ib_cq *cq) +void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); @@ -479,17 +475,18 @@ mlx4_cq_free(dev->dev, &mcq->mcq); mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); - if (cq->uobject) { - mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); - ib_umem_release(mcq->umem); + if (udata) { + mlx4_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + &mcq->db); } else { mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); mlx4_db_free(dev->dev, &mcq->db); } - - kfree(mcq); - - return 0; + ib_umem_release(mcq->umem); } static void dump_cqe(void *cqe) diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mad.c @@ -199,13 +199,13 @@ ah_attr.port_num = port_num; new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - ib_destroy_ah(dev->sm_ah[port_num - 1]); + ib_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -540,7 +540,7 @@ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); attr.ah_flags = IB_AH_GRH; } - ah = ib_create_ah(tun_ctx->pd, &attr); + ah = ib_create_ah(tun_ctx->pd, &attr, 0); if (IS_ERR(ah)) return -ENOMEM; @@ -557,7 +557,7 @@ tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) - ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); + ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0); tun_qp->tx_ring[tun_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, @@ -632,7 +632,7 @@ spin_unlock(&tun_qp->tx_lock); tun_qp->tx_ring[tun_tx_ix].ah = NULL; end: - ib_destroy_ah(ah); + ib_destroy_ah(ah, 0); return ret; } @@ -986,7 +986,7 @@ struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0]) - ib_destroy_ah(mad_send_wc->send_buf->context[0]); + ib_destroy_ah(mad_send_wc->send_buf->context[0], 0); ib_free_send_mad(mad_send_wc->send_buf); } @@ -1041,7 +1041,7 @@ } if (dev->sm_ah[p]) - ib_destroy_ah(dev->sm_ah[p]); + ib_destroy_ah(dev->sm_ah[p], 0); } } @@ -1334,11 +1334,9 @@ struct ib_ah *ah; struct ib_qp *send_qp = NULL; unsigned wire_tx_ix = 0; - int ret = 0; u16 wire_pkey_ix; int src_qpnum; - u8 sgid_index; - + int ret; sqp_ctx = dev->sriov.sqps[port-1]; @@ -1358,16 +1356,20 @@ send_qp = sqp->qp; - /* create ah */ - sgid_index = attr->grh.sgid_index; - attr->grh.sgid_index = 0; - ah = ib_create_ah(sqp_ctx->pd, attr); - if (IS_ERR(ah)) + ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah); + if (!ah) return -ENOMEM; - attr->grh.sgid_index = sgid_index; - to_mah(ah)->av.ib.gid_index = sgid_index; - /* get rid of force-loopback bit */ - to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + + ah->device = sqp_ctx->pd->device; + ah->pd = sqp_ctx->pd; + + /* create ah */ + ret = mlx4_ib_create_ah_slave(ah, attr, + attr->grh.sgid_index, + s_mac, vlan_id); + if (ret) + goto out; + spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1379,8 +1381,7 @@ goto out; sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); - if (sqp->tx_ring[wire_tx_ix].ah) - ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); + kfree(sqp->tx_ring[wire_tx_ix].ah); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1409,12 +1410,6 @@ wr.wr.num_sge = 1; wr.wr.opcode = IB_WR_SEND; wr.wr.send_flags = IB_SEND_SIGNALED; - if (s_mac) - memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); - if (vlan_id < 0x1000) - vlan_id |= (attr->sl & 7) << 13; - to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr.wr, &bad_wr); if (!ret) @@ -1425,7 +1420,7 @@ spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - ib_destroy_ah(ah); + kfree(ah); return ret; } @@ -1684,7 +1679,7 @@ tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); if (tun_qp->tx_ring[i].ah) - ib_destroy_ah(tun_qp->tx_ring[i].ah); + ib_destroy_ah(tun_qp->tx_ring[i].ah, 0); } kfree(tun_qp->tx_ring); kfree(tun_qp->ring); @@ -1717,7 +1712,7 @@ "wrid=0x%llx, status=0x%x\n", (unsigned long long)wc.wr_id, wc.status); ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1734,7 +1729,7 @@ ctx->slave, wc.status, (unsigned long long)wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1872,8 +1867,8 @@ if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + kfree(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1902,8 +1897,8 @@ " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, (unsigned long long)wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + kfree(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c @@ -1039,17 +1039,18 @@ return err; } -static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_ucontext *context; + struct mlx4_ib_ucontext *context = to_mucontext(uctx); struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) - return ERR_PTR(-EAGAIN); + return -EAGAIN; if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { resp_v3.qp_tab_size = dev->dev->caps.num_qps; @@ -1063,15 +1064,9 @@ resp.cqe_size = dev->dev->caps.cqe_size; } - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); - if (err) { - kfree(context); - return ERR_PTR(err); - } + if (err) + return err; INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1083,176 +1078,87 @@ if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); - kfree(context); - return ERR_PTR(-EFAULT); + return -EFAULT; } - return &context->ibucontext; + return err; } -static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); - kfree(context); - - return 0; -} - -static void mlx4_ib_vma_open(struct vm_area_struct *area) -{ - /* vma_open is called when a new VMA is created on top of our VMA. - * This is done through either mremap flow or split_vma (usually due - * to mlock, madvise, munmap, etc.). We do not support a clone of the - * vma, as this VMA is strongly hardware related. Therefore we set the - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from - * calling us again and trying to do incorrect actions. We assume that - * the original vma size is exactly a single page that there will be no - * "splitting" operations on. - */ - area->vm_ops = NULL; -} - -static void mlx4_ib_vma_close(struct vm_area_struct *area) -{ - struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data; - - /* It's guaranteed that all VMAs opened on a FD are closed before the - * file itself is closed, therefore no sync is needed with the regular - * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync - * with accessing the vma as part of mlx4_ib_disassociate_ucontext. - * The close operation is usually called under mm->mmap_sem except when - * process is exiting. The exiting case is handled explicitly as part - * of mlx4_ib_disassociate_ucontext. - */ - mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *) - area->vm_private_data; - - /* set the vma context pointer to null in the mlx4_ib driver's private - * data to protect against a race condition in mlx4_ib_dissassociate_ucontext(). - */ - mlx4_ib_vma_priv_data->vma = NULL; -} - -static const struct vm_operations_struct mlx4_ib_vm_ops = { - .open = mlx4_ib_vma_open, - .close = mlx4_ib_vma_close -}; - -static void mlx4_ib_set_vma_data(struct vm_area_struct *vma, - struct mlx4_ib_vma_private_data *vma_private_data) -{ - vma_private_data->vma = vma; - vma->vm_private_data = vma_private_data; - vma->vm_ops = &mlx4_ib_vm_ops; } static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); - struct mlx4_ib_ucontext *mucontext = to_mucontext(context); - if (vma->vm_end - vma->vm_start != PAGE_SIZE) - return -EINVAL; + switch (vma->vm_pgoff) { + case 0: + return rdma_user_mmap_io(context, vma, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + NULL); - if (vma->vm_pgoff == 0) { - /* We prevent double mmaping on same context */ - if (mucontext->hw_bar_info[HW_BAR_DB].vma) + case 1: + if (dev->dev->caps.bf_reg_size == 0) return -EINVAL; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (io_remap_pfn_range(vma, vma->vm_start, - to_mucontext(context)->uar.pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]); - - } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { - /* We prevent double mmaping on same context */ - if (mucontext->hw_bar_info[HW_BAR_BF].vma) - return -EINVAL; - - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - - if (io_remap_pfn_range(vma, vma->vm_start, - to_mucontext(context)->uar.pfn + - dev->dev->caps.num_uars, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]); - - } else if (vma->vm_pgoff == 3) { + return rdma_user_mmap_io( + context, vma, + to_mucontext(context)->uar.pfn + + dev->dev->caps.num_uars, + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), + NULL); + + case 3: { struct mlx4_clock_params params; int ret; - /* We prevent double mmaping on same context */ - if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma) - return -EINVAL; - ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); - if (ret) return ret; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (io_remap_pfn_range(vma, vma->vm_start, - (pci_resource_start(dev->dev->persist->pdev, - params.bar) + - params.offset) - >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx4_ib_set_vma_data(vma, - &mucontext->hw_bar_info[HW_BAR_CLOCK]); - } else { - return -EINVAL; + return rdma_user_mmap_io( + context, vma, + (pci_resource_start(dev->dev->persist->pdev, + params.bar) + + params.offset) >> + PAGE_SHIFT, + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), + NULL); } - return 0; + default: + return -EINVAL; + } } -static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { - struct mlx4_ib_pd *pd; + struct mlx4_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; int err; - pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); - if (err) { - kfree(pd); - return ERR_PTR(err); - } - - if (context) - if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { - mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); - kfree(pd); - return ERR_PTR(-EFAULT); - } + if (err) + return err; - return &pd->ibpd; + if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); + return -EFAULT; + } + return 0; } -static int mlx4_ib_dealloc_pd(struct ib_pd *pd) +static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); - kfree(pd); - - return 0; } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; @@ -1294,7 +1200,7 @@ return ERR_PTR(err); } -static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); @@ -1732,7 +1638,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain) + int domain, struct ib_udata *udata) { int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; @@ -1747,6 +1653,10 @@ (flow_attr->type != IB_FLOW_ATTR_NORMAL)) return ERR_PTR(-EOPNOTSUPP); + if (udata && + udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + memset(type, 0, sizeof(type)); mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); @@ -2550,6 +2460,7 @@ ibdev->dev = dev; ibdev->bond_next_port = 0; + INIT_IB_DEVICE_OPS(&ibdev->ib_dev.ops, mlx4, MLX4); strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_mr.c @@ -323,7 +323,7 @@ } } -int mlx4_ib_dereg_mr(struct ib_mr *ibmr) +int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int ret; @@ -383,9 +383,8 @@ return 0; } -struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; @@ -409,7 +408,6 @@ goto err_free_mr; mr->max_pages = max_num_sg; - err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_free_pl; @@ -420,6 +418,7 @@ return &mr->ibmr; err_free_pl: + mr->ibmr.device = pd->device; mlx4_free_priv_pages(mr); err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1022,7 +1023,7 @@ } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - int is_user) + struct ib_udata *udata) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1064,7 +1065,7 @@ list_del(&qp->qps_list); list_del(&qp->cq_send_list); list_del(&qp->cq_recv_list); - if (!is_user) { + if (!udata) { __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); if (send_cq != recv_cq) @@ -1087,11 +1088,16 @@ mlx4_mtt_cleanup(dev->dev, &qp->mtt); - if (is_user) { - if (qp->rq.wqe_cnt) - mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), - &qp->db); - ib_umem_release(qp->umem); + if (udata) { + if (qp->rq.wqe_cnt) { + struct mlx4_ib_ucontext *mcontext = + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext); + + mlx4_ib_db_unmap_user(mcontext, &qp->db); + } } else { kvfree(qp->sq.wrid); kvfree(qp->rq.wrid); @@ -1102,6 +1108,7 @@ if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } + ib_umem_release(qp->umem); del_gid_entries(qp); } @@ -1273,7 +1280,7 @@ return ibqp; } -static int _mlx4_ib_destroy_qp(struct ib_qp *qp) +static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -1292,7 +1299,7 @@ mlx4_ib_free_qp_counter(dev, mqp); pd = get_pd(mqp); - destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, udata); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1302,7 +1309,7 @@ return 0; } -int mlx4_ib_destroy_qp(struct ib_qp *qp) +int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -1313,7 +1320,7 @@ ib_destroy_qp(sqp->roce_v2_gsi); } - return _mlx4_ib_destroy_qp(qp); + return _mlx4_ib_destroy_qp(qp, udata); } static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) @@ -1618,12 +1625,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state) + enum ib_qp_state cur_state, + enum ib_qp_state new_state, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_ib_pd *pd; struct mlx4_ib_cq *send_cq, *recv_cq; + struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; @@ -1699,10 +1710,9 @@ context->param3 |= cpu_to_be32(1 << 30); } - if (qp->ibqp.uobject) + if (ucontext) context->usr_page = cpu_to_be32( - mlx4_to_hw_uar_index(dev->dev, - to_mucontext(ibqp->uobject->context)->uar.index)); + mlx4_to_hw_uar_index(dev->dev, ucontext->uar.index)); else context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); @@ -2242,7 +2252,7 @@ goto out; } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state, udata); if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c --- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c +++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_srq.c @@ -37,6 +37,7 @@ #include "mlx4_ib.h" #include +#include static void *get_wqe(struct mlx4_ib_srq *srq, int n) { @@ -68,12 +69,14 @@ } } -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); - struct mlx4_ib_srq *srq; + struct mlx4_ib_dev *dev = to_mdev(ib_srq->device); + struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); + struct mlx4_ib_srq *srq = to_msrq(ib_srq); struct mlx4_wqe_srq_next_seg *next; struct mlx4_wqe_data_seg *scatter; u32 cqn; @@ -86,11 +89,7 @@ /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) - return ERR_PTR(-EINVAL); - - srq = kmalloc(sizeof *srq, GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); @@ -105,20 +104,16 @@ buf_size = srq->msrq.max * desc_size; - if (pd->uobject) { + if (udata) { struct mlx4_ib_create_srq ucmd; - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { - err = -EFAULT; - goto err_srq; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - buf_size, 0, 0); - if (IS_ERR(srq->umem)) { - err = PTR_ERR(srq->umem); - goto err_srq; - } + srq->umem = + ib_umem_get(&ucontext->ibucontext, ucmd.buf_addr, buf_size, 0, 0); + if (IS_ERR(srq->umem)) + return PTR_ERR(srq->umem); err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), ilog2(srq->umem->page_size), &srq->mtt); @@ -129,14 +124,14 @@ if (err) goto err_mtt; - err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), + err = mlx4_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db); if (err) goto err_mtt; } else { err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); if (err) - goto err_srq; + return err; *srq->db.db = 0; @@ -183,19 +178,19 @@ } cqn = (init_attr->srq_type == IB_SRQT_XRC) ? - to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + to_mcq(init_attr->ext.cq)->mcq.cqn : 0; xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : (u16) dev->dev->caps.reserved_xrcds; - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, - srq->db.dma, &srq->msrq); + err = mlx4_srq_alloc(dev->dev, to_mpd(ib_srq->pd)->pdn, cqn, xrcdn, + &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; srq->msrq.event = mlx4_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; goto err_wrid; @@ -203,11 +198,11 @@ init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_wrid: - if (pd->uobject) - mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + if (udata) + mlx4_ib_db_unmap_user(ucontext, &srq->db); else kvfree(srq->wrid); @@ -215,19 +210,15 @@ mlx4_mtt_cleanup(dev->dev, &srq->mtt); err_buf: - if (pd->uobject) - ib_umem_release(srq->umem); - else + if (!srq->umem) mlx4_buf_free(dev->dev, buf_size, &srq->buf); + ib_umem_release(srq->umem); err_db: - if (!pd->uobject) + if (!udata) mlx4_db_free(dev->dev, &srq->db); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -274,7 +265,7 @@ return 0; } -int mlx4_ib_destroy_srq(struct ib_srq *srq) +void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); @@ -282,19 +273,20 @@ mlx4_srq_free(dev->dev, &msrq->msrq); mlx4_mtt_cleanup(dev->dev, &msrq->mtt); - if (srq->uobject) { - mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); - ib_umem_release(msrq->umem); + if (udata) { + mlx4_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + &msrq->db); } else { kvfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); } - - kfree(msrq); - - return 0; + ib_umem_release(msrq->umem); } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h --- a/sys/dev/mlx5/device.h +++ b/sys/dev/mlx5/device.h @@ -299,6 +299,7 @@ MLX5_EVENT_QUEUE_TYPE_QP = 0, MLX5_EVENT_QUEUE_TYPE_RQ = 1, MLX5_EVENT_QUEUE_TYPE_SQ = 2, + MLX5_EVENT_QUEUE_TYPE_DCT = 6, }; enum { @@ -502,7 +503,9 @@ }; struct mlx5_eqe_qp_srq { - __be32 reserved[6]; + __be32 reserved1[5]; + u8 type; + u8 reserved2[3]; __be32 qp_srq_n; }; @@ -512,6 +515,12 @@ u8 syndrome; }; +struct mlx5_eqe_xrq_err { + __be32 reserved1[5]; + __be32 type_xrqn; + __be32 reserved2; +}; + struct mlx5_eqe_port_state { u8 reserved0[8]; u8 port; @@ -595,6 +604,11 @@ u32 rsvd0[6]; }; +struct mlx5_eqe_dct { + __be32 reserved[6]; + __be32 dctn; +}; + struct mlx5_eqe_temp_warning { __be64 sensor_warning_msb; __be64 sensor_warning_lsb; @@ -614,7 +628,9 @@ struct mlx5_eqe_port_module_event port_module_event; struct mlx5_eqe_vport_change vport_change; struct mlx5_eqe_general_notification_event general_notifications; + struct mlx5_eqe_dct dct; struct mlx5_eqe_temp_warning temp_warning; + struct mlx5_eqe_xrq_err xrq_err; } __packed; struct mlx5_eqe { @@ -957,6 +973,7 @@ MLX5_CAP_DMC, MLX5_CAP_DEC, MLX5_CAP_TLS, + MLX5_CAP_DEV_EVENT = 0x14, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1120,6 +1137,9 @@ #define MLX5_CAP_TLS(mdev, cap) \ MLX5_GET(tls_capabilities, (mdev)->hca_caps_cur[MLX5_CAP_TLS], cap) +#define MLX5_CAP_DEV_EVENT(mdev, cap)\ + MLX5_ADDR_OF(device_event_cap, (mdev)->hca_caps_cur[MLX5_CAP_DEV_EVENT], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -399,7 +399,7 @@ enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, - MLX5_MKEY_MR_USER, + MLX5_MKEY_INDIRECT_DEVX, }; struct mlx5_core_mkey { @@ -410,20 +410,14 @@ u32 type; }; -struct mlx5_core_mr { - u64 iova; - u64 size; - u32 key; - u32 pd; -}; - enum mlx5_res_type { MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, - MLX5_RES_DCT = 5, + MLX5_RES_XRQ = 5, + MLX5_RES_DCT = MLX5_EVENT_QUEUE_TYPE_DCT, }; struct mlx5_core_rsc_common { @@ -477,6 +471,7 @@ struct completion free; }; +struct mlx5_ib_dev; struct mlx5_eq_table { void __iomem *update_ci; void __iomem *update_arm_ci; @@ -485,9 +480,10 @@ struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; int num_comp_vectors; - /* protect EQs list - */ - spinlock_t lock; + spinlock_t lock; /* protect EQs list */ + struct mlx5_ib_dev *dev; /* for devx event notifier */ + bool (*cb)(struct mlx5_core_dev *mdev, + uint8_t event_type, void *data); }; struct mlx5_core_health { @@ -999,21 +995,21 @@ void mlx5_init_mr_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mr *mkey, + struct mlx5_core_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, int inlen, u32 *out, int outlen, mlx5_async_cbk_t callback, struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mr *mr, + struct mlx5_core_mkey *mr, u32 *in, int inlen); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mr, u32 *mkey); -int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); -int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn, u16 uid); +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); void mlx5_fwp_flush(struct mlx5_fw_page *fwp); diff --git a/sys/dev/mlx5/mlx5_core/mlx5_eq.c b/sys/dev/mlx5/mlx5_core/mlx5_eq.c --- a/sys/dev/mlx5/mlx5_core/mlx5_eq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_eq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -244,7 +244,11 @@ mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); - switch (eqe->type) { + + if (dev->priv.eq_table.cb != NULL && + dev->priv.eq_table.cb(dev, eqe->type, &eqe->data)) { + /* FALLTHROUGH */ + } else switch (eqe->type) { case MLX5_EVENT_TYPE_COMP: mlx5_cq_completion(dev, eqe); break; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_fw.c b/sys/dev/mlx5/mlx5_core/mlx5_fw.c --- a/sys/dev/mlx5/mlx5_core/mlx5_fw.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_fw.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -233,6 +233,12 @@ return err; } + if (MLX5_CAP_GEN(dev, event_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT); + if (err) + return err; + } + err = mlx5_core_query_special_contexts(dev); if (err) return err; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_mr.c b/sys/dev/mlx5/mlx5_core/mlx5_mr.c --- a/sys/dev/mlx5/mlx5_core/mlx5_mr.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_mr.c @@ -49,7 +49,7 @@ } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mr *mkey, + struct mlx5_core_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, int inlen, u32 *out, int outlen, mlx5_async_cbk_t callback, @@ -111,7 +111,7 @@ EXPORT_SYMBOL(mlx5_core_create_mkey_cb); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mr *mkey, + struct mlx5_core_mkey *mkey, u32 *in, int inlen) { return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, @@ -119,12 +119,12 @@ } EXPORT_SYMBOL(mlx5_core_create_mkey); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey) +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { struct mlx5_mr_table *table = &dev->priv.mr_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; - struct mlx5_core_mr *deleted_mr; + struct mlx5_core_mkey *deleted_mr; unsigned long flags; spin_lock_irqsave(&table->lock, flags); @@ -142,7 +142,7 @@ } EXPORT_SYMBOL(mlx5_core_destroy_mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0}; @@ -155,7 +155,7 @@ } EXPORT_SYMBOL(mlx5_core_query_mkey); -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *_mkey, +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey) { u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0}; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_pd.c b/sys/dev/mlx5/mlx5_core/mlx5_pd.c --- a/sys/dev/mlx5/mlx5_core/mlx5_pd.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_pd.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -30,13 +30,14 @@ #include #include "mlx5_core.h" -int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn, u16 uid) { u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0}; int err; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -47,12 +48,13 @@ } EXPORT_SYMBOL(mlx5_core_alloc_pd); -int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) { u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0}; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, uid, uid); MLX5_SET(dealloc_pd_in, in, pd, pdn); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); diff --git a/sys/dev/mlx5/mlx5_core/mlx5_tls.c b/sys/dev/mlx5/mlx5_core/mlx5_tls.c --- a/sys/dev/mlx5/mlx5_core/mlx5_tls.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_tls.c @@ -116,5 +116,5 @@ void mlx5_tls_close_tis(struct mlx5_core_dev *mdev, u32 tisn) { - mlx5_core_destroy_tis(mdev, tisn); + mlx5_core_destroy_tis(mdev, tisn, 0); } diff --git a/sys/dev/mlx5/mlx5_core/mlx5_transobj.c b/sys/dev/mlx5/mlx5_core/mlx5_transobj.c --- a/sys/dev/mlx5/mlx5_core/mlx5_transobj.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_transobj.c @@ -30,7 +30,7 @@ #include "mlx5_core.h" #include "transobj.h" -int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) +int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u32 uid) { u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; @@ -38,6 +38,7 @@ MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -47,7 +48,7 @@ return err; } -void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) +void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, u32 uid) { u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; @@ -55,6 +56,7 @@ MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + MLX5_SET(dealloc_transport_domain_in, in, uid, uid); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -164,12 +166,13 @@ return err; } -void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u32 uid) { u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0}; MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, uid, uid); MLX5_SET(destroy_tir_in, in, tirn, tirn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); @@ -201,12 +204,13 @@ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u32 uid) { u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, uid, uid); MLX5_SET(destroy_tis_in, in, tisn, tisn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); @@ -374,12 +378,13 @@ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 uid) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, uid, uid); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); diff --git a/sys/dev/mlx5/mlx5_core/transobj.h b/sys/dev/mlx5/mlx5_core/transobj.h --- a/sys/dev/mlx5/mlx5_core/transobj.h +++ b/sys/dev/mlx5/mlx5_core/transobj.h @@ -28,8 +28,8 @@ #ifndef __TRANSOBJ_H__ #define __TRANSOBJ_H__ -int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); -void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); +int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u32 uid); +void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, u32 uid); int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 *in, int inlen); @@ -42,12 +42,12 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); -void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u32 uid); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, int inlen); -void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u32 uid); int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn); int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); @@ -62,6 +62,6 @@ u32 *rqtn); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen); -void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 uid); #endif /* __TRANSOBJ_H__ */ diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -1021,7 +1021,7 @@ struct sx state_lock; /* Protects Interface state */ u32 pdn; u32 tdn; - struct mlx5_core_mr mr; + struct mlx5_core_mkey mr; u32 tisn[MLX5E_MAX_TX_NUM_TC]; u32 rqtn; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -2636,7 +2636,7 @@ static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) { - mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc], 0); } static int @@ -2965,7 +2965,7 @@ mlx5e_close_tir(struct mlx5e_priv *priv, int tt, bool inner_vxlan) { mlx5_core_destroy_tir(priv->mdev, inner_vxlan ? - priv->tirn_inner_vxlan[tt] : priv->tirn[tt]); + priv->tirn_inner_vxlan[tt] : priv->tirn[tt], 0); } static int @@ -3740,7 +3740,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mr *mkey) + struct mlx5_core_mkey *mkey) { struct ifnet *ifp = priv->ifp; struct mlx5_core_dev *mdev = priv->mdev; @@ -4538,12 +4538,12 @@ /* reuse mlx5core's watchdog workqueue */ priv->wq = mdev->priv.health.wq_watchdog; - err = mlx5_core_alloc_pd(mdev, &priv->pdn); + err = mlx5_core_alloc_pd(mdev, &priv->pdn, 0); if (err) { mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err); goto err_free_wq; } - err = mlx5_alloc_transport_domain(mdev, &priv->tdn); + err = mlx5_alloc_transport_domain(mdev, &priv->tdn, 0); if (err) { mlx5_en_err(ifp, "mlx5_alloc_transport_domain failed, %d\n", err); @@ -4709,10 +4709,10 @@ mlx5_core_destroy_mkey(priv->mdev, &priv->mr); err_dealloc_transport_domain: - mlx5_dealloc_transport_domain(mdev, priv->tdn); + mlx5_dealloc_transport_domain(mdev, priv->tdn, 0); err_dealloc_pd: - mlx5_core_dealloc_pd(mdev, priv->pdn); + mlx5_core_dealloc_pd(mdev, priv->pdn, 0); err_free_wq: flush_workqueue(priv->wq); @@ -4808,8 +4808,8 @@ sysctl_ctx_free(&priv->sysctl_ctx); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); - mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); - mlx5_core_dealloc_pd(priv->mdev, priv->pdn); + mlx5_dealloc_transport_domain(priv->mdev, priv->tdn, 0); + mlx5_core_dealloc_pd(priv->mdev, priv->pdn, 0); mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); mlx5e_priv_static_destroy(priv, mdev, mdev->priv.eq_table.num_comp_vectors); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c @@ -634,7 +634,7 @@ static void mlx5e_rl_close_tis(struct mlx5e_priv *priv) { - mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); + mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn, 0); } static void diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -66,15 +68,6 @@ MLX5_IB_MMAP_CMD_MASK = 0xff, }; -enum mlx5_ib_mmap_cmd { - MLX5_IB_MMAP_REGULAR_PAGE = 0, - MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, - MLX5_IB_MMAP_WC_PAGE = 2, - MLX5_IB_MMAP_NC_PAGE = 3, - /* 5 is chosen in order to be compatible with old versions of libmlx5 */ - MLX5_IB_MMAP_CORE_CLOCK = 5, -}; - enum { MLX5_RES_SCAT_DATA32_CQE = 0x1, MLX5_RES_SCAT_DATA64_CQE = 0x2, @@ -109,9 +102,11 @@ MLX5_IB_INVALID_BFREG = BIT(31), }; -struct mlx5_ib_vma_private_data { - struct list_head list; - struct vm_area_struct *vma; +enum mlx5_ib_mmap_type { + MLX5_IB_MMAP_TYPE_MEMIC = 1, + MLX5_IB_MMAP_TYPE_VAR = 2, + MLX5_IB_MMAP_TYPE_UAR_WC = 3, + MLX5_IB_MMAP_TYPE_UAR_NC = 4, }; struct mlx5_bfreg_info { @@ -143,7 +138,9 @@ u8 cqe_version; /* Transport Domain number */ u32 tdn; - struct list_head vma_private_list; + + u64 lib_caps; + u16 devx_uid; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -154,6 +151,7 @@ struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; + u16 uid; }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) @@ -279,6 +277,7 @@ struct mlx5_ib_rwq_ind_table { struct ib_rwq_ind_table ib_rwq_ind_tbl; u32 rqtn; + u16 uid; }; /* @@ -358,12 +357,18 @@ spinlock_t lock32; }; +struct mlx5_ib_dct { + struct mlx5_core_dct mdct; + u32 *in; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; union { struct mlx5_ib_qp_trans trans_qp; struct mlx5_ib_raw_packet_qp raw_packet_qp; struct mlx5_ib_rss_qp rss_qp; + struct mlx5_ib_dct dct; }; struct mlx5_buf buf; @@ -433,6 +438,7 @@ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, + MLX5_IB_QP_UNDERLAY = 1 << 10, }; struct mlx5_umr_wr { @@ -517,6 +523,13 @@ MLX5_IB_MTT_WRITE = (1 << 1), }; +struct mlx5_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + u8 mmap_flag; + u64 address; + u32 page_idx; +}; + #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) struct mlx5_ib_mr { @@ -527,7 +540,7 @@ int max_descs; int desc_size; int access_mode; - struct mlx5_core_mr mmkey; + struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; @@ -545,7 +558,12 @@ struct mlx5_ib_mw { struct ib_mw ibmw; - struct mlx5_core_mr mmkey; + struct mlx5_core_mkey mmkey; +}; + +struct mlx5_ib_devx_mr { + struct mlx5_core_mkey mmkey; + int ndescs; }; struct mlx5_ib_umr_context { @@ -730,6 +748,12 @@ }; }; +struct mlx5_devx_event_table { + /* serialize updating the event_xa */ + struct mutex event_xa_lock; + struct xarray event_xa; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -739,7 +763,8 @@ /* serialize update of capability mask */ struct mutex cap_mask_mutex; - bool ib_active; + u8 ib_active:1; + u8 wc_support:1; struct umr_common umrc; /* sync used page count stats */ @@ -766,6 +791,7 @@ struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_devx_event_table devx_event_table; struct mlx5_ib_congestion congestion; struct mlx5_async_ctx async_ctx; @@ -786,6 +812,14 @@ return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + return to_mdev(context->ibucontext.device); +} + static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); @@ -801,7 +835,7 @@ return container_of(core_qp, struct mlx5_ib_rwq, core_qp); } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmkey) +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { return container_of(mmkey, struct mlx5_ib_mr, mmkey); } @@ -856,6 +890,13 @@ return container_of(ibah, struct mlx5_ib_ah, ibah); } +static inline struct mlx5_user_mmap_entry * +to_mmmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, + struct mlx5_user_mmap_entry, rdma_entry); +} + int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); @@ -865,17 +906,16 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, +int mlx5_ib_create_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah); -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -int mlx5_ib_destroy_srq(struct ib_srq *srq); +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, @@ -885,7 +925,7 @@ int attr_mask, struct ib_udata *udata); int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, @@ -894,11 +934,9 @@ int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); -int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -915,10 +953,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); -int mlx5_ib_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -927,9 +964,8 @@ struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); + struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -971,7 +1007,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx5_ib_destroy_wq(struct ib_wq *wq); +void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -1043,6 +1079,28 @@ void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn); + +#if 1 /* IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) */ +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); +void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); +void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); +#else +static inline int +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + bool is_user) { return -EOPNOTSUPP; } +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} +static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} +static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, + int *dest_type) +{ + return false; +} +#endif + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_ah.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -27,7 +27,7 @@ #include "mlx5_ib.h" -static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, +static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct ib_ah_attr *ah_attr, enum rdma_link_layer ll) @@ -55,22 +55,20 @@ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; ah->av.stat_rate_sl |= (ah_attr->sl & 0xf); } - - return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, - struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx5_ib_ah *ah; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_ah *ah = to_mah(ibah); + struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_link_layer ll; - ll = pd->device->get_link_layer(pd->device, ah_attr->port_num); + ll = dev->ib_dev.get_link_layer(&dev->ib_dev, ah_attr->port_num); if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ll == IB_LINK_LAYER_ETHERNET && udata) { int err; @@ -79,25 +77,22 @@ sizeof(resp.dmac); if (udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; resp.response_length = min_resp_len; - err = ib_resolve_eth_dmac(pd->device, ah_attr); + err = ib_resolve_eth_dmac(&dev->ib_dev, ah_attr); if (err) - return ERR_PTR(err); + return err; memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - return ERR_PTR(err); + return err; } - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */ + create_ib_ah(dev, ah, ah_attr, ll); + return 0; } int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) @@ -123,8 +118,7 @@ return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah) +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -29,6 +29,7 @@ #include #include #include +#include #include "mlx5_ib.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe __unused) @@ -519,7 +520,7 @@ struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mr *mmkey; + struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; unsigned long flags; uint8_t opcode; @@ -741,11 +742,10 @@ } static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, - struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, u32 **cqb, + struct mlx5_ib_cq *cq, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { - struct mlx5_ib_create_cq ucmd; + struct mlx5_ib_create_cq ucmd = {}; size_t ucmdlen; int page_shift; __be64 *pas; @@ -753,6 +753,8 @@ int ncont; void *cqc; int err; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); ucmdlen = min(udata->inlen, sizeof(ucmd)); if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) @@ -769,7 +771,7 @@ *cqe_size = ucmd.cqe_size; - cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, + cq->buf.umem = ib_umem_get(&context->ibucontext, ucmd.buf_addr, entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->buf.umem)) { @@ -777,7 +779,7 @@ return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -804,29 +806,33 @@ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; - } else if (to_mucontext(context)->bfregi.lib_uar_dyn) { + } else if (context->bfregi.lib_uar_dyn) { err = -EINVAL; goto err_cqb; } else { - *index = to_mucontext(context)->bfregi.sys_pages[0]; + *index = context->bfregi.sys_pages[0]; } + MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); return 0; err_cqb: kvfree(*cqb); err_db: - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + mlx5_ib_db_unmap_user(context, &cq->db); err_umem: ib_umem_release(cq->buf.umem); return err; } -static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + mlx5_ib_db_unmap_user(context, &cq->db); ib_umem_release(cq->buf.umem); } @@ -906,16 +912,15 @@ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; struct mlx5_ib_dev *dev = to_mdev(ibdev); u32 out[MLX5_ST_SZ_DW(create_cq_out)]; - struct mlx5_ib_cq *cq; + struct mlx5_ib_cq *cq = to_mcq(ibcq); int uninitialized_var(index); int uninitialized_var(inlen); u32 *cqb = NULL; @@ -927,18 +932,14 @@ if (entries < 0 || (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) - return ERR_PTR(-EINVAL); + return -EINVAL; if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; entries = roundup_pow_of_two(entries + 1); if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) - return ERR_PTR(-EINVAL); - - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + return -EINVAL; cq->ibcq.cqe = entries - 1; mutex_init(&cq->resize_mutex); @@ -949,17 +950,17 @@ INIT_LIST_HEAD(&cq->list_send_qp); INIT_LIST_HEAD(&cq->list_recv_qp); - if (context) { - err = create_cq_user(dev, udata, context, cq, entries, - &cqb, &cqe_size, &index, &inlen); + if (udata) { + err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, + &index, &inlen); if (err) - goto err_create; + return err; } else { cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) - goto err_create; + return err; INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } @@ -990,7 +991,7 @@ INIT_LIST_HEAD(&cq->wc_list); - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; goto err_cmd; @@ -998,43 +999,30 @@ kvfree(cqb); - return &cq->ibcq; + return 0; err_cmd: mlx5_core_destroy_cq(dev->mdev, &cq->mcq); err_cqb: kvfree(cqb); - if (context) - destroy_cq_user(cq, context); + if (udata) + destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); - -err_create: - kfree(cq); - - return ERR_PTR(err); + return err; } - -int mlx5_ib_destroy_cq(struct ib_cq *cq) +void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); - struct ib_ucontext *context = NULL; - - if (cq->uobject) - context = cq->uobject->context; mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); - if (context) - destroy_cq_user(mcq, context); + if (udata) + destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); - - kfree(mcq); - - return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c new file mode 100644 --- /dev/null +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c @@ -0,0 +1,2930 @@ +/*- + * Copyright (c) 2018-2020, Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_ib.h" + +#include + +#include +#include +#include +#include +#include +#include + +#define UVERBS_MODULE_NAME mlx5_ib +#include + +static void dispatch_event_fd(struct list_head *fd_list, const void *data); + +enum devx_obj_flags { + DEVX_OBJ_FLAGS_DCT = 1 << 1, + DEVX_OBJ_FLAGS_CQ = 1 << 2, +}; + +struct devx_async_data { + struct mlx5_ib_dev *mdev; + struct list_head list; + struct devx_async_cmd_event_file *ev_file; + struct mlx5_async_work cb_work; + u16 cmd_out_len; + /* must be last field in this structure */ + struct mlx5_ib_uapi_devx_async_cmd_hdr hdr; +}; + +struct devx_async_event_data { + struct list_head list; /* headed in ev_file->event_list */ + struct mlx5_ib_uapi_devx_async_event_hdr hdr; +}; + +/* first level XA value data structure */ +struct devx_event { + struct xarray object_ids; /* second XA level, Key = object id */ + struct list_head unaffiliated_list; +}; + +/* second level XA value data structure */ +struct devx_obj_event { + struct rcu_head rcu; + struct list_head obj_sub_list; +}; + +struct devx_event_subscription { + struct list_head file_list; /* headed in ev_file-> + * subscribed_events_list + */ + struct list_head xa_list; /* headed in devx_event->unaffiliated_list or + * devx_obj_event->obj_sub_list + */ + struct list_head obj_list; /* headed in devx_object */ + struct list_head event_list; /* headed in ev_file->event_list or in + * temp list via subscription + */ + + u8 is_cleaned:1; + u32 xa_key_level1; + u32 xa_key_level2; + struct rcu_head rcu; + u64 cookie; + struct devx_async_event_file *ev_file; + struct fd eventfd; +}; + +struct devx_async_event_file { + struct ib_uobject uobj; + /* Head of events that are subscribed to this FD */ + struct list_head subscribed_events_list; + spinlock_t lock; + wait_queue_head_t poll_wait; + struct list_head event_list; + struct mlx5_ib_dev *dev; + u8 omit_data:1; + u8 is_overflow_err:1; + u8 is_destroyed:1; +}; + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_ib_dev *ib_dev; + u64 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + union { + struct mlx5_ib_devx_mr devx_mr; + struct mlx5_core_dct core_dct; + struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; + }; + struct list_head event_sub; /* holds devx_event_subscription entries */ +}; + +struct devx_umem { + struct mlx5_core_dev *mdev; + struct ib_umem *umem; + u32 page_offset; + int page_shift; + int ncont; + u32 dinlen; + u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; +}; + +struct devx_umem_reg_cmd { + void *in; + u32 inlen; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; +}; + +static struct mlx5_ib_ucontext * +devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) +{ + return to_mucontext(ib_uverbs_get_ucontext(attrs)); +} + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +{ + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + void *uctx; + int err; + u16 uid; + u32 cap = 0; + + /* 0 means not supported */ + if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx)) + return -EINVAL; + + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + if (is_user && priv_check(curthread, PRIV_NET_RAW) == 0 && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + cap |= MLX5_UCTX_CAP_RAW_TX; + if (is_user && priv_check(curthread, PRIV_DRIVER) == 0 && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_INTERNAL_DEV_RES)) + cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(uctx, uctx, cap, cap); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return uid; +} + +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); + + mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + *counter_id += offset; + return true; + } + + return false; +} + +static bool is_legacy_unaffiliated_event_num(u16 event_num) +{ + switch (event_num) { + case MLX5_EVENT_TYPE_PORT_CHANGE: + return true; + default: + return false; + } +} + +static bool is_legacy_obj_event_num(u16 event_num) +{ + switch (event_num) { + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_CQ_ERROR: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_COMP: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: + case MLX5_EVENT_TYPE_XRQ_ERROR: + return true; + default: + return false; + } +} + +static u16 get_legacy_obj_type(u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_EVENT_QUEUE_TYPE_RQ; + case MLX5_CMD_OP_CREATE_QP: + return MLX5_EVENT_QUEUE_TYPE_QP; + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_EVENT_QUEUE_TYPE_SQ; + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_EVENT_QUEUE_TYPE_DCT; + default: + return 0; + } +} + +static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num) +{ + u16 opcode; + + opcode = (obj->obj_id >> 32) & 0xffff; + + if (is_legacy_obj_event_num(event_num)) + return get_legacy_obj_type(opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJ: + return (obj->obj_id >> 48); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_OBJ_TYPE_RQ; + case MLX5_CMD_OP_CREATE_QP: + return MLX5_OBJ_TYPE_QP; + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_OBJ_TYPE_SQ; + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_OBJ_TYPE_DCT; + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_OBJ_TYPE_TIR; + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_OBJ_TYPE_TIS; + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_OBJ_TYPE_PSV; + case MLX5_OBJ_TYPE_MKEY: + return MLX5_OBJ_TYPE_MKEY; + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_OBJ_TYPE_RMP; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_OBJ_TYPE_XRC_SRQ; + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_OBJ_TYPE_XRQ; + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_OBJ_TYPE_RQT; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_OBJ_TYPE_FLOW_COUNTER; + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_OBJ_TYPE_CQ; + default: + return 0; + } +} + +static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe) +{ + switch (event_type) { + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return eqe->data.qp_srq.type; + case MLX5_EVENT_TYPE_CQ_ERROR: + case MLX5_EVENT_TYPE_XRQ_ERROR: + return 0; + case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: + return MLX5_EVENT_QUEUE_TYPE_DCT; + default: + return MLX5_GET(affiliated_event_header, &eqe->data, obj_type); + } +} + +static u32 get_dec_obj_id(u64 obj_id) +{ + return (obj_id & 0xffffffff); +} + +/* + * As the obj_id in the firmware is not globally unique the object type + * must be considered upon checking for a valid object id. + * For that the opcode of the creator command is encoded as part of the obj_id. + */ +static u64 get_enc_obj_id(u32 opcode, u32 obj_id) +{ + return ((u64)opcode << 32) | obj_id; +} + +static u64 devx_get_obj_id(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + u64 obj_id; + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJ: + case MLX5_CMD_OP_QUERY_GENERAL_OBJ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJ | + MLX5_GET(general_obj_in_cmd_hdr, in, + obj_type) << 16, + MLX5_GET(general_obj_in_cmd_hdr, in, + obj_id)); + break; + case MLX5_CMD_OP_QUERY_MKEY: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY, + MLX5_GET(query_mkey_in, in, + mkey_index)); + break; + case MLX5_CMD_OP_QUERY_CQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + MLX5_GET(query_cq_in, in, cqn)); + break; + case MLX5_CMD_OP_MODIFY_CQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + MLX5_GET(modify_cq_in, in, cqn)); + break; + case MLX5_CMD_OP_QUERY_SQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + MLX5_GET(query_sq_in, in, sqn)); + break; + case MLX5_CMD_OP_MODIFY_SQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + MLX5_GET(modify_sq_in, in, sqn)); + break; + case MLX5_CMD_OP_QUERY_RQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + MLX5_GET(query_rq_in, in, rqn)); + break; + case MLX5_CMD_OP_MODIFY_RQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + MLX5_GET(modify_rq_in, in, rqn)); + break; + case MLX5_CMD_OP_QUERY_RMP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP, + MLX5_GET(query_rmp_in, in, rmpn)); + break; + case MLX5_CMD_OP_MODIFY_RMP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP, + MLX5_GET(modify_rmp_in, in, rmpn)); + break; + case MLX5_CMD_OP_QUERY_RQT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + MLX5_GET(query_rqt_in, in, rqtn)); + break; + case MLX5_CMD_OP_MODIFY_RQT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + MLX5_GET(modify_rqt_in, in, rqtn)); + break; + case MLX5_CMD_OP_QUERY_TIR: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + MLX5_GET(query_tir_in, in, tirn)); + break; + case MLX5_CMD_OP_MODIFY_TIR: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + MLX5_GET(modify_tir_in, in, tirn)); + break; + case MLX5_CMD_OP_QUERY_TIS: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + MLX5_GET(query_tis_in, in, tisn)); + break; + case MLX5_CMD_OP_MODIFY_TIS: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + MLX5_GET(modify_tis_in, in, tisn)); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE, + MLX5_GET(query_flow_table_in, in, + table_id)); + break; + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE, + MLX5_GET(modify_flow_table_in, in, + table_id)); + break; + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP, + MLX5_GET(query_flow_group_in, in, + group_id)); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY, + MLX5_GET(query_fte_in, in, + flow_index)); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY, + MLX5_GET(set_fte_in, in, flow_index)); + break; + case MLX5_CMD_OP_QUERY_Q_COUNTER: + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER, + MLX5_GET(query_q_counter_in, in, + counter_set_id)); + break; + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER, + MLX5_GET(query_flow_counter_in, in, + flow_counter_id)); + break; + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, + MLX5_GET(general_obj_in_cmd_hdr, in, + obj_id)); + break; + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, + MLX5_GET(query_scheduling_element_in, + in, scheduling_element_id)); + break; + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, + MLX5_GET(modify_scheduling_element_in, + in, scheduling_element_id)); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT, + MLX5_GET(add_vxlan_udp_dport_in, in, + vxlan_udp_port)); + break; + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY, + MLX5_GET(query_l2_table_entry_in, in, + table_index)); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY, + MLX5_GET(set_l2_table_entry_in, in, + table_index)); + break; + case MLX5_CMD_OP_QUERY_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(query_qp_in, in, qpn)); + break; + case MLX5_CMD_OP_RST2INIT_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(rst2init_qp_in, in, qpn)); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(init2rtr_qp_in, in, qpn)); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(rtr2rts_qp_in, in, qpn)); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(rts2rts_qp_in, in, qpn)); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(sqerr2rts_qp_in, in, qpn)); + break; + case MLX5_CMD_OP_2ERR_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(qp_2err_in, in, qpn)); + break; + case MLX5_CMD_OP_2RST_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(qp_2rst_in, in, qpn)); + break; + case MLX5_CMD_OP_QUERY_DCT: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + MLX5_GET(query_dct_in, in, dctn)); + break; + case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, + MLX5_GET(query_xrq_in, in, xrqn)); + break; + case MLX5_CMD_OP_QUERY_XRC_SRQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ, + MLX5_GET(query_xrc_srq_in, in, + xrc_srqn)); + break; + case MLX5_CMD_OP_ARM_XRC_SRQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ, + MLX5_GET(arm_xrc_srq_in, in, xrc_srqn)); + break; + case MLX5_CMD_OP_QUERY_SRQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ, + MLX5_GET(query_srq_in, in, srqn)); + break; + case MLX5_CMD_OP_ARM_RQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + MLX5_GET(arm_rq_in, in, srq_number)); + break; + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + MLX5_GET(drain_dct_in, in, dctn)); + break; + case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_MODIFY_XRQ: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, + MLX5_GET(arm_xrq_in, in, xrqn)); + break; + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + obj_id = get_enc_obj_id + (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT, + MLX5_GET(query_packet_reformat_context_in, + in, packet_reformat_id)); + break; + default: + obj_id = 0; + } + + return obj_id; +} + +static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, + struct ib_uobject *uobj, const void *in) +{ + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); + u64 obj_id = devx_get_obj_id(in); + + if (!obj_id) + return false; + + switch (uobj_get_object_id(uobj)) { + case UVERBS_OBJECT_CQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + to_mcq(uobj->object)->mcq.cqn) == + obj_id; + + case UVERBS_OBJECT_SRQ: + { + struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); + u16 opcode; + + switch (srq->common.res) { + case MLX5_RES_XSRQ: + opcode = MLX5_CMD_OP_CREATE_XRC_SRQ; + break; + case MLX5_RES_XRQ: + opcode = MLX5_CMD_OP_CREATE_XRQ; + break; + default: + if (!dev->mdev->issi) + opcode = MLX5_CMD_OP_CREATE_SRQ; + else + opcode = MLX5_CMD_OP_CREATE_RMP; + } + + return get_enc_obj_id(opcode, + to_msrq(uobj->object)->msrq.srqn) == + obj_id; + } + + case UVERBS_OBJECT_QP: + { + struct mlx5_ib_qp *qp = to_mqp(uobj->object); + enum ib_qp_type qp_type = qp->ibqp.qp_type; + + if (qp_type == IB_QPT_RAW_PACKET || + (qp->flags & MLX5_IB_QP_UNDERLAY)) { + struct mlx5_ib_raw_packet_qp *raw_packet_qp = + &qp->raw_packet_qp; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + + return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + rq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + sq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + rq->tirn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + sq->tisn) == obj_id); + } + + if (qp_type == MLX5_IB_QPT_DCT) + return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + qp->dct.mdct.dctn) == obj_id; + + return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + qp->ibqp.qp_num) == obj_id; + } + + case UVERBS_OBJECT_WQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + to_mrwq(uobj->object)->core_qp.qpn) == + obj_id; + + case UVERBS_OBJECT_RWQ_IND_TBL: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + to_mrwq_ind_table(uobj->object)->rqtn) == + obj_id; + + case MLX5_IB_OBJECT_DEVX_OBJ: + return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + + default: + return false; + } +} + +static void devx_set_umem_valid(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); + break; + case MLX5_CMD_OP_CREATE_CQ: + { + void *cqc; + + MLX5_SET(create_cq_in, in, cq_umem_valid, 1); + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, dbr_umem_valid, 1); + break; + } + case MLX5_CMD_OP_CREATE_QP: + { + void *qpc; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, dbr_umem_valid, 1); + MLX5_SET(create_qp_in, in, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_RQ: + { + void *rqc, *wq; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_SQ: + { + void *sqc, *wq; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_MODIFY_CQ: + MLX5_SET(modify_cq_in, in, cq_umem_valid, 1); + break; + + case MLX5_CMD_OP_CREATE_RMP: + { + void *rmpc, *wq; + + rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_XRQ: + { + void *xrqc, *wq; + + xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + MLX5_SET(wq, wq, dbr_umem_valid, 1); + MLX5_SET(wq, wq, wq_umem_valid, 1); + break; + } + + case MLX5_CMD_OP_CREATE_XRC_SRQ: + { + void *xrc_srqc; + + MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in, + xrc_srq_context_entry); + MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1); + break; + } + + default: + return; + } +} + +static bool devx_is_obj_create_cmd(const void *in, u16 *opcode) +{ + *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (*opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJ: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_ALLOC_XRCD: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + if (op_mod == 0) + return true; + return false; + } + case MLX5_CMD_OP_CREATE_PSV: + { + u8 num_psv = MLX5_GET(create_psv_in, in, num_psv); + + if (num_psv == 1) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_modify_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJ: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_MODIFY_XRQ: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + + if (op_mod == 1) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_query_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_GENERAL_OBJ: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + return true; + default: + return false; + } +} + +static bool devx_is_whitelist_cmd(void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + return true; + default: + return false; + } +} + +static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) +{ + if (devx_is_whitelist_cmd(cmd_in)) { + if (c->devx_uid) + return c->devx_uid; + + return -EOPNOTSUPP; + } + + if (!c->devx_uid) + return -EINVAL; + + return c->devx_uid; +} + +static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */ + if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) && + MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) || + (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END)) + return true; + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_QUERY_LAG: + return true; + default: + return false; + } +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + int user_vector; + int dev_eqn; + unsigned int irqn; + int err; + + if (uverbs_copy_from(&user_vector, attrs, + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) + return -EFAULT; + + c = devx_ufile2uctx(attrs); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + if (err < 0) + return err; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + &dev_eqn, sizeof(dev_eqn))) + return -EFAULT; + + return 0; +} + +/* + *Security note: + * The hardware protection mechanism works like this: Each device object that + * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in + * the device specification manual) upon its creation. Then upon doorbell, + * hardware fetches the object context for which the doorbell was rang, and + * validates that the UAR through which the DB was rang matches the UAR ID + * of the object. + * If no match the doorbell is silently ignored by the hardware. Of course, + * the user cannot ring a doorbell on a UAR that was not mapped to it. + * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command + * mailboxes (except tagging them with UID), we expose to the user its UAR + * ID, so it can embed it in these objects in the expected specification + * format. So the only thing the user can do is hurt itself by creating a + * QP/SQ/CQ with a UAR ID other than his, and then in this case other users + * may ring a doorbell on its objects. + * The consequence of that will be that another user can schedule a QP/SQ + * of the buggy user for execution (just insert it to the hardware schedule + * queue or arm its CQ for event generation), no further harm is expected. + */ +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + u32 user_idx; + s32 dev_idx; + + c = devx_ufile2uctx(attrs); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + if (uverbs_copy_from(&user_idx, attrs, + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX)) + return -EFAULT; + + dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true); + if (dev_idx < 0) + return dev_idx; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + &dev_idx, sizeof(dev_idx))) + return -EFAULT; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + void *cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); + void *cmd_out; + int err; + int uid; + + c = devx_ufile2uctx(attrs); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + /* Only white list of some general HCA commands are allowed for this method. */ + if (!devx_is_general_cmd(cmd_in, dev)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, + cmd_out_len); +} + +static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, + u32 *dinlen, + u32 *obj_id) +{ + u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); + + switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJ); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + break; + + case MLX5_CMD_OP_CREATE_UMEM: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_UMEM); + break; + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + break; + case MLX5_CMD_OP_CREATE_CQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + break; + case MLX5_CMD_OP_ALLOC_PD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + break; + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + break; + case MLX5_CMD_OP_CREATE_RMP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + break; + case MLX5_CMD_OP_CREATE_SQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + break; + case MLX5_CMD_OP_CREATE_RQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + break; + case MLX5_CMD_OP_CREATE_RQT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + break; + case MLX5_CMD_OP_CREATE_TIR: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + break; + case MLX5_CMD_OP_CREATE_TIS: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + break; + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + break; + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); + *obj_id = MLX5_GET(create_flow_table_out, out, table_id); + MLX5_SET(destroy_flow_table_in, din, other_vport, + MLX5_GET(create_flow_table_in, in, other_vport)); + MLX5_SET(destroy_flow_table_in, din, vport_number, + MLX5_GET(create_flow_table_in, in, vport_number)); + MLX5_SET(destroy_flow_table_in, din, table_type, + MLX5_GET(create_flow_table_in, in, table_type)); + MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + break; + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); + *obj_id = MLX5_GET(create_flow_group_out, out, group_id); + MLX5_SET(destroy_flow_group_in, din, other_vport, + MLX5_GET(create_flow_group_in, in, other_vport)); + MLX5_SET(destroy_flow_group_in, din, vport_number, + MLX5_GET(create_flow_group_in, in, vport_number)); + MLX5_SET(destroy_flow_group_in, din, table_type, + MLX5_GET(create_flow_group_in, in, table_type)); + MLX5_SET(destroy_flow_group_in, din, table_id, + MLX5_GET(create_flow_group_in, in, table_id)); + MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); + *obj_id = MLX5_GET(set_fte_in, in, flow_index); + MLX5_SET(delete_fte_in, din, other_vport, + MLX5_GET(set_fte_in, in, other_vport)); + MLX5_SET(delete_fte_in, din, vport_number, + MLX5_GET(set_fte_in, in, vport_number)); + MLX5_SET(delete_fte_in, din, table_type, + MLX5_GET(set_fte_in, in, table_type)); + MLX5_SET(delete_fte_in, din, table_id, + MLX5_GET(set_fte_in, in, table_id)); + MLX5_SET(delete_fte_in, din, flow_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + break; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + break; + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + break; + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + break; + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); + *obj_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_hierarchy, + MLX5_GET(create_scheduling_element_in, in, + scheduling_hierarchy)); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_element_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); + *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); + *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + break; + case MLX5_CMD_OP_CREATE_QP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + break; + case MLX5_CMD_OP_CREATE_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + break; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + break; + case MLX5_CMD_OP_CREATE_DCT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + break; + case MLX5_CMD_OP_CREATE_XRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + break; + case MLX5_CMD_OP_ATTACH_TO_MCG: + *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); + MLX5_SET(detach_from_mcg_in, din, qpn, + MLX5_GET(attach_to_mcg_in, in, qpn)); + memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), + MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), + MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + break; + case MLX5_CMD_OP_ALLOC_XRCD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + break; + case MLX5_CMD_OP_CREATE_PSV: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, din, psvn, + MLX5_GET(create_psv_out, out, psv0_index)); + break; + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + break; + } +} + +static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, + struct devx_obj *obj, + void *in, int in_len) +{ + int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) + + MLX5_FLD_SZ_BYTES(create_mkey_in, + memory_key_mkey_entry); + void *mkc; + u8 access_mode; + + if (in_len < min_len) + return -EINVAL; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + access_mode = MLX5_GET(mkc, mkc, access_mode); + access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2; + + if (access_mode == MLX5_ACCESS_MODE_KLM || + access_mode == MLX5_ACCESS_MODE_KSM) { + return 0; + } + + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); + return 0; +} + +static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, + struct devx_event_subscription *sub) +{ + struct devx_event *event; + struct devx_obj_event *xa_val_level2; + + if (sub->is_cleaned) + return; + + sub->is_cleaned = 1; + list_del_rcu(&sub->xa_list); + + if (list_empty(&sub->obj_list)) + return; + + list_del_rcu(&sub->obj_list); + /* check whether key level 1 for this obj_sub_list is empty */ + event = xa_load(&dev->devx_event_table.event_xa, + sub->xa_key_level1); + WARN_ON(!event); + + xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2); + if (list_empty(&xa_val_level2->obj_sub_list)) { + xa_erase(&event->object_ids, + sub->xa_key_level2); + kfree_rcu(xa_val_level2, rcu); + } +} + +static int devx_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct mlx5_devx_event_table *devx_event_table; + struct devx_obj *obj = uobject->object; + struct devx_event_subscription *sub_entry, *tmp; + struct mlx5_ib_dev *dev; + int ret; + + dev = mlx5_udata_to_mdev(&attrs->driver_udata); + if (obj->flags & DEVX_OBJ_FLAGS_DCT) + ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); + else if (obj->flags & DEVX_OBJ_FLAGS_CQ) + ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq); + else + ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, + obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + devx_event_table = &dev->devx_event_table; + + mutex_lock(&devx_event_table->event_xa_lock); + list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list) + devx_cleanup_subscription(dev, sub_entry); + mutex_unlock(&devx_event_table->event_xa_lock); + + kfree(obj); + return ret; +} + +static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq); + struct mlx5_devx_event_table *table; + struct devx_event *event; + struct devx_obj_event *obj_event; + u32 obj_id = mcq->cqn; + + table = &obj->ib_dev->devx_event_table; + rcu_read_lock(); + event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP); + if (!event) + goto out; + + obj_event = xa_load(&event->object_ids, obj_id); + if (!obj_event) + goto out; + + dispatch_event_fd(&obj_event->obj_sub_list, eqe); +out: + rcu_read_unlock(); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + int cmd_in_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); + void *cmd_out; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct devx_obj *obj; + u16 obj_type = 0; + int err; + int uid; + u32 obj_id; + u16 opcode; + + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_create_cmd(cmd_in, &opcode)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + if (opcode == MLX5_CMD_OP_CREATE_MKEY) { + err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len); + if (err) + goto obj_free; + } else { + devx_set_umem_valid(cmd_in); + } + + if (opcode == MLX5_CMD_OP_CREATE_DCT) { + obj->flags |= DEVX_OBJ_FLAGS_DCT; + err = mlx5_core_create_dct(dev->mdev, &obj->core_dct, + cmd_in, cmd_in_len, + cmd_out, cmd_out_len); + } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + obj->flags |= DEVX_OBJ_FLAGS_CQ; + obj->core_cq.comp = devx_cq_comp; + err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, + cmd_in, cmd_in_len, cmd_out, + cmd_out_len); + } else { + err = mlx5_cmd_exec(dev->mdev, cmd_in, + cmd_in_len, + cmd_out, cmd_out_len); + } + + if (err) + goto obj_free; + + if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { + u8 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = 128UL * bulk; + } + + uobj->object = obj; + INIT_LIST_HEAD(&obj->event_sub); + obj->ib_dev = dev; + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, + &obj_id); + WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); + if (err) + goto obj_destroy; + + if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJ) + obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); + obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + + return 0; + +obj_destroy: + if (obj->flags & DEVX_OBJ_FLAGS_DCT) + mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); + else if (obj->flags & DEVX_OBJ_FLAGS_CQ) + mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq); + else + mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, + sizeof(out)); +obj_free: + kfree(obj); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); + void *cmd_out; + int err; + int uid; + + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_modify_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + devx_set_umem_valid(cmd_in); + + err = mlx5_cmd_exec(mdev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + cmd_out, cmd_out_len); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + void *cmd_out; + int err; + int uid; + struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); + + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + err = mlx5_cmd_exec(mdev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + cmd_out, cmd_out_len); +} + +struct devx_async_event_queue { + spinlock_t lock; + wait_queue_head_t poll_wait; + struct list_head event_list; + atomic_t bytes_in_use; + u8 is_destroyed:1; +}; + +struct devx_async_cmd_event_file { + struct ib_uobject uobj; + struct devx_async_event_queue ev_queue; + struct mlx5_async_ctx async_ctx; +}; + +static void devx_init_event_queue(struct devx_async_event_queue *ev_queue) +{ + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + atomic_set(&ev_queue->bytes_in_use, 0); + ev_queue->is_destroyed = 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct devx_async_cmd_event_file *ev_file; + + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + + ev_file = container_of(uobj, struct devx_async_cmd_event_file, + uobj); + devx_init_event_queue(&ev_file->ev_queue); + mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx); + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE); + struct devx_async_event_file *ev_file; + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + u32 flags; + int err; + + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS, + MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA); + + if (err) + return err; + + ev_file = container_of(uobj, struct devx_async_event_file, + uobj); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA) + ev_file->omit_data = 1; + INIT_LIST_HEAD(&ev_file->subscribed_events_list); + ev_file->dev = dev; + get_device(&dev->ib_dev.dev); + return 0; +} + +static void devx_query_callback(int status, struct mlx5_async_work *context) +{ + struct devx_async_data *async_data = + container_of(context, struct devx_async_data, cb_work); + struct devx_async_cmd_event_file *ev_file = async_data->ev_file; + struct devx_async_event_queue *ev_queue = &ev_file->ev_queue; + unsigned long flags; + + /* + * Note that if the struct devx_async_cmd_event_file uobj begins to be + * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this + * routine returns, ensuring that it always remains valid here. + */ + spin_lock_irqsave(&ev_queue->lock, flags); + list_add_tail(&async_data->list, &ev_queue->event_list); + spin_unlock_irqrestore(&ev_queue->lock, flags); + + wake_up_interruptible(&ev_queue->poll_wait); +} + +#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */ + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN); + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE); + u16 cmd_out_len; + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct ib_uobject *fd_uobj; + int err; + int uid; + struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); + struct devx_async_cmd_event_file *ev_file; + struct devx_async_data *async_data; + + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + err = uverbs_get_const(&cmd_out_len, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN); + if (err) + return err; + + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) + return -EINVAL; + + fd_uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD); + if (IS_ERR(fd_uobj)) + return PTR_ERR(fd_uobj); + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + + if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) > + MAX_ASYNC_BYTES_IN_USE) { + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return -EAGAIN; + } + + async_data = kvzalloc(struct_size(async_data, hdr.out_data, + cmd_out_len), GFP_KERNEL); + if (!async_data) { + err = -ENOMEM; + goto sub_bytes; + } + + err = uverbs_copy_from(&async_data->hdr.wr_id, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID); + if (err) + goto free_async; + + async_data->cmd_out_len = cmd_out_len; + async_data->mdev = mdev; + async_data->ev_file = ev_file; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN), + async_data->hdr.out_data, + async_data->cmd_out_len, + devx_query_callback, &async_data->cb_work); + + if (err) + goto free_async; + + return 0; + +free_async: + kvfree(async_data); +sub_bytes: + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return err; +} + +static void +subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table, + u32 key_level1, + bool is_level2, + u32 key_level2) +{ + struct devx_event *event; + struct devx_obj_event *xa_val_level2; + + /* Level 1 is valid for future use, no need to free */ + if (!is_level2) + return; + + event = xa_load(&devx_event_table->event_xa, key_level1); + WARN_ON(!event); + + xa_val_level2 = xa_load(&event->object_ids, + key_level2); + if (list_empty(&xa_val_level2->obj_sub_list)) { + xa_erase(&event->object_ids, + key_level2); + kfree_rcu(xa_val_level2, rcu); + } +} + +static int +subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, + u32 key_level1, + bool is_level2, + u32 key_level2) +{ + struct devx_obj_event *obj_event; + struct devx_event *event; + int err; + + event = xa_load(&devx_event_table->event_xa, key_level1); + if (!event) { + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + INIT_LIST_HEAD(&event->unaffiliated_list); + xa_init_flags(&event->object_ids, 0); + + err = xa_insert(&devx_event_table->event_xa, + key_level1, + event, + GFP_KERNEL); + if (err) { + kfree(event); + return err; + } + } + + if (!is_level2) + return 0; + + obj_event = xa_load(&event->object_ids, key_level2); + if (!obj_event) { + obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL); + if (!obj_event) + /* Level1 is valid for future use, no need to free */ + return -ENOMEM; + + err = xa_insert(&event->object_ids, + key_level2, + obj_event, + GFP_KERNEL); + if (err) + return err; + INIT_LIST_HEAD(&obj_event->obj_sub_list); + } + + return 0; +} + +static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list, + struct devx_obj *obj) +{ + int i; + + for (i = 0; i < num_events; i++) { + if (obj) { + if (!is_legacy_obj_event_num(event_type_num_list[i])) + return false; + } else if (!is_legacy_unaffiliated_event_num( + event_type_num_list[i])) { + return false; + } + } + + return true; +} + +#define MAX_SUPP_EVENT_NUM 255 +static bool is_valid_events(struct mlx5_core_dev *dev, + int num_events, u16 *event_type_num_list, + struct devx_obj *obj) +{ + __be64 *aff_events; + __be64 *unaff_events; + int mask_entry; + int mask_bit; + int i; + + if (MLX5_CAP_GEN(dev, event_cap)) { + aff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev, + user_affiliated_events); + unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev, + user_unaffiliated_events); + } else { + return is_valid_events_legacy(num_events, event_type_num_list, + obj); + } + + for (i = 0; i < num_events; i++) { + if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM) + return false; + + mask_entry = event_type_num_list[i] / 64; + mask_bit = event_type_num_list[i] % 64; + + if (obj) { + /* CQ completion */ + if (event_type_num_list[i] == 0) + continue; + + if (!(be64_to_cpu(aff_events[mask_entry]) & + (1ull << mask_bit))) + return false; + + continue; + } + + if (!(be64_to_cpu(unaff_events[mask_entry]) & + (1ull << mask_bit))) + return false; + } + + return true; +} + +#define MAX_NUM_EVENTS 16 +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *devx_uobj = uverbs_attr_get_uobject( + attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + struct ib_uobject *fd_uobj; + struct devx_obj *obj = NULL; + struct devx_async_event_file *ev_file; + struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table; + u16 *event_type_num_list; + struct devx_event_subscription *event_sub, *tmp_sub; + struct list_head sub_list; + int redirect_fd; + bool use_eventfd = false; + int num_events; + int num_alloc_xa_entries = 0; + u16 obj_type = 0; + u64 cookie = 0; + u32 obj_id = 0; + int err; + int i; + + if (!c->devx_uid) + return -EINVAL; + + if (!IS_ERR(devx_uobj)) { + obj = (struct devx_obj *)devx_uobj->object; + if (obj) + obj_id = get_dec_obj_id(obj->obj_id); + } + + fd_uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE); + if (IS_ERR(fd_uobj)) + return PTR_ERR(fd_uobj); + + ev_file = container_of(fd_uobj, struct devx_async_event_file, + uobj); + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) { + err = uverbs_copy_from(&redirect_fd, attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM); + if (err) + return err; + + use_eventfd = true; + } + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) { + if (use_eventfd) + return -EINVAL; + + err = uverbs_copy_from(&cookie, attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE); + if (err) + return err; + } + + num_events = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST, + sizeof(u16)); + + if (num_events < 0) + return num_events; + + if (num_events > MAX_NUM_EVENTS) + return -EINVAL; + + event_type_num_list = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST); + + if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj)) + return -EINVAL; + + INIT_LIST_HEAD(&sub_list); + + /* Protect from concurrent subscriptions to same XA entries to allow + * both to succeed + */ + mutex_lock(&devx_event_table->event_xa_lock); + for (i = 0; i < num_events; i++) { + u32 key_level1; + + if (obj) + obj_type = get_dec_obj_type(obj, + event_type_num_list[i]); + key_level1 = event_type_num_list[i] | obj_type << 16; + + err = subscribe_event_xa_alloc(devx_event_table, + key_level1, + obj, + obj_id); + if (err) + goto err; + + num_alloc_xa_entries++; + event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); + if (!event_sub) + goto err; + + list_add_tail(&event_sub->event_list, &sub_list); + uverbs_uobject_get(&ev_file->uobj); + if (use_eventfd) { + event_sub->eventfd = + fdget(redirect_fd); + + if (event_sub->eventfd.file == NULL) { + err = -EBADF; + goto err; + } + } + + event_sub->cookie = cookie; + event_sub->ev_file = ev_file; + /* May be needed upon cleanup the devx object/subscription */ + event_sub->xa_key_level1 = key_level1; + event_sub->xa_key_level2 = obj_id; + INIT_LIST_HEAD(&event_sub->obj_list); + } + + /* Once all the allocations and the XA data insertions were done we + * can go ahead and add all the subscriptions to the relevant lists + * without concern of a failure. + */ + list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) { + struct devx_event *event; + struct devx_obj_event *obj_event; + + list_del_init(&event_sub->event_list); + + spin_lock_irq(&ev_file->lock); + list_add_tail_rcu(&event_sub->file_list, + &ev_file->subscribed_events_list); + spin_unlock_irq(&ev_file->lock); + + event = xa_load(&devx_event_table->event_xa, + event_sub->xa_key_level1); + WARN_ON(!event); + + if (!obj) { + list_add_tail_rcu(&event_sub->xa_list, + &event->unaffiliated_list); + continue; + } + + obj_event = xa_load(&event->object_ids, obj_id); + WARN_ON(!obj_event); + list_add_tail_rcu(&event_sub->xa_list, + &obj_event->obj_sub_list); + list_add_tail_rcu(&event_sub->obj_list, + &obj->event_sub); + } + + mutex_unlock(&devx_event_table->event_xa_lock); + return 0; + +err: + list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) { + list_del(&event_sub->event_list); + + subscribe_event_xa_dealloc(devx_event_table, + event_sub->xa_key_level1, + obj, + obj_id); + + if (event_sub->eventfd.file) + fdput(event_sub->eventfd); + uverbs_uobject_put(&event_sub->ev_file->uobj); + kfree(event_sub); + } + + mutex_unlock(&devx_event_table->event_xa_lock); + return err; +} + +static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, + struct uverbs_attr_bundle *attrs, + struct devx_umem *obj) +{ + u64 addr; + size_t size; + u32 access; + int npages; + int err; + u32 page_mask; + + if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || + uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) + return -EFAULT; + + err = uverbs_get_flags32(&access, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + if (err) + return err; + + err = ib_check_mr_access(access); + if (err) + return err; + + obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + + mlx5_ib_cont_pages(obj->umem, obj->umem->address, + MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &obj->page_shift, &obj->ncont, NULL); + + if (!npages) { + ib_umem_release(obj->umem); + return -EINVAL; + } + + page_mask = (1 << obj->page_shift) - 1; + obj->page_offset = obj->umem->address & page_mask; + + return 0; +} + +static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + cmd->in = uverbs_zalloc(attrs, cmd->inlen); + return PTR_ERR_OR_ZERO(cmd->in); +} + +static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + void *umem; + __be64 *mtt; + + umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); + mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); + + MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); + MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); + MLX5_SET(umem, umem, log_page_size, obj->page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, obj->page_offset); + mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | + MLX5_IB_MTT_READ); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( + struct uverbs_attr_bundle *attrs) +{ + struct devx_umem_reg_cmd cmd; + struct devx_umem *obj; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); + u32 obj_id; + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + int err; + + if (!c->devx_uid) + return -EINVAL; + + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + if (err) + goto err_obj_free; + + err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + if (err) + goto err_umem_release; + + devx_umem_reg_cmd_build(dev, obj, &cmd); + + MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, + sizeof(cmd.out)); + if (err) + goto err_umem_release; + + obj->mdev = dev->mdev; + uobj->object = obj; + devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); + if (err) + goto err_umem_destroy; + + return 0; + +err_umem_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); +err_umem_release: + ib_umem_release(obj->umem); +err_obj_free: + kfree(obj); + return err; +} + +static int devx_umem_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct devx_umem *obj = uobject->object; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + int err; + + err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(err, why, uobject)) + return err; + + ib_umem_release(obj->umem); + kfree(obj); + return 0; +} + +static bool is_unaffiliated_event(struct mlx5_core_dev *dev, + unsigned long event_type) +{ + __be64 *unaff_events; + int mask_entry; + int mask_bit; + + if (!MLX5_CAP_GEN(dev, event_cap)) + return is_legacy_unaffiliated_event_num(event_type); + + unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev, + user_unaffiliated_events); + WARN_ON(event_type > MAX_SUPP_EVENT_NUM); + + mask_entry = event_type / 64; + mask_bit = event_type % 64; + + if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit))) + return false; + + return true; +} + +static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data) +{ + struct mlx5_eqe *eqe = data; + u32 obj_id = 0; + + switch (event_type) { + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + break; + case MLX5_EVENT_TYPE_XRQ_ERROR: + obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff; + break; + case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: + obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + break; + case MLX5_EVENT_TYPE_CQ_ERROR: + obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + break; + default: + obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id); + break; + } + + return obj_id; +} + +static int deliver_event(struct devx_event_subscription *event_sub, + const void *data) +{ + struct devx_async_event_file *ev_file; + struct devx_async_event_data *event_data; + unsigned long flags; + + ev_file = event_sub->ev_file; + + if (ev_file->omit_data) { + spin_lock_irqsave(&ev_file->lock, flags); + if (!list_empty(&event_sub->event_list) || + ev_file->is_destroyed) { + spin_unlock_irqrestore(&ev_file->lock, flags); + return 0; + } + + list_add_tail(&event_sub->event_list, &ev_file->event_list); + spin_unlock_irqrestore(&ev_file->lock, flags); + wake_up_interruptible(&ev_file->poll_wait); + return 0; + } + + event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe), + GFP_ATOMIC); + if (!event_data) { + spin_lock_irqsave(&ev_file->lock, flags); + ev_file->is_overflow_err = 1; + spin_unlock_irqrestore(&ev_file->lock, flags); + return -ENOMEM; + } + + event_data->hdr.cookie = event_sub->cookie; + memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe)); + + spin_lock_irqsave(&ev_file->lock, flags); + if (!ev_file->is_destroyed) + list_add_tail(&event_data->list, &ev_file->event_list); + else + kfree(event_data); + spin_unlock_irqrestore(&ev_file->lock, flags); + wake_up_interruptible(&ev_file->poll_wait); + + return 0; +} + +static void dispatch_event_fd(struct list_head *fd_list, + const void *data) +{ + struct devx_event_subscription *item; + + list_for_each_entry_rcu(item, fd_list, xa_list) { + if (item->eventfd.file != NULL) + linux_poll_wakeup(item->eventfd.file); + else + deliver_event(item, data); + } +} + +static bool mlx5_devx_event_notifier(struct mlx5_core_dev *mdev, + uint8_t event_type, void *data) +{ + struct mlx5_ib_dev *dev; + struct mlx5_devx_event_table *table; + struct devx_event *event; + struct devx_obj_event *obj_event; + u16 obj_type = 0; + bool is_unaffiliated; + u32 obj_id; + + /* Explicit filtering to kernel events which may occur frequently */ + if (event_type == MLX5_EVENT_TYPE_CMD || + event_type == MLX5_EVENT_TYPE_PAGE_REQUEST) + return true; + + dev = mdev->priv.eq_table.dev; + table = &dev->devx_event_table; + is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type); + + if (!is_unaffiliated) + obj_type = get_event_obj_type(event_type, data); + + rcu_read_lock(); + event = xa_load(&table->event_xa, event_type | (obj_type << 16)); + if (!event) { + rcu_read_unlock(); + return false; + } + + if (is_unaffiliated) { + dispatch_event_fd(&event->unaffiliated_list, data); + rcu_read_unlock(); + return true; + } + + obj_id = devx_get_obj_id_from_event(event_type, data); + obj_event = xa_load(&event->object_ids, obj_id); + if (!obj_event) { + rcu_read_unlock(); + return false; + } + + dispatch_event_fd(&obj_event->obj_sub_list, data); + + rcu_read_unlock(); + return true; +} + +void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_devx_event_table *table = &dev->devx_event_table; + + xa_init_flags(&table->event_xa, 0); + mutex_init(&table->event_xa_lock); + dev->mdev->priv.eq_table.dev = dev; + dev->mdev->priv.eq_table.cb = mlx5_devx_event_notifier; +} + +void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_devx_event_table *table = &dev->devx_event_table; + struct devx_event_subscription *sub, *tmp; + struct devx_event *event; + void *entry; + unsigned long id; + + dev->mdev->priv.eq_table.cb = NULL; + dev->mdev->priv.eq_table.dev = NULL; + mutex_lock(&dev->devx_event_table.event_xa_lock); + xa_for_each(&table->event_xa, id, entry) { + event = entry; + list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list, + xa_list) + devx_cleanup_subscription(dev, sub); + kfree(entry); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + xa_destroy(&table->event_xa); +} + +static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *event; + int ret = 0; + size_t eventsz; + + spin_lock_irq(&ev_queue->lock); + + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible( + ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || + ev_queue->is_destroyed))) { + return -ERESTARTSYS; + } + + spin_lock_irq(&ev_queue->lock); + if (ev_queue->is_destroyed) { + spin_unlock_irq(&ev_queue->lock); + return -EIO; + } + } + + event = list_entry(ev_queue->event_list.next, + struct devx_async_data, list); + eventsz = event->cmd_out_len + + sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr); + + if (eventsz > count) { + spin_unlock_irq(&ev_queue->lock); + return -ENOSPC; + } + + list_del(ev_queue->event_list.next); + spin_unlock_irq(&ev_queue->lock); + + if (copy_to_user(buf, &event->hdr, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + + atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use); + kvfree(event); + return ret; +} + +static __poll_t devx_async_cmd_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + __poll_t pollflags = 0; + + poll_wait(filp, &ev_queue->poll_wait, wait); + + spin_lock_irq(&ev_queue->lock); + if (ev_queue->is_destroyed) + pollflags = POLLIN | POLLRDNORM | POLLHUP; + else if (!list_empty(&ev_queue->event_list)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&ev_queue->lock); + + return pollflags; +} + +static const struct file_operations devx_async_cmd_event_fops = { + .owner = THIS_MODULE, + .read = devx_async_cmd_event_read, + .poll = devx_async_cmd_event_poll, + .release = uverbs_uobject_fd_release, + .llseek = no_llseek, +}; + +static ssize_t devx_async_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct devx_async_event_file *ev_file = filp->private_data; + struct devx_event_subscription *event_sub; + struct devx_async_event_data *uninitialized_var(event); + int ret = 0; + size_t eventsz; + bool omit_data; + void *event_data; + + omit_data = ev_file->omit_data; + + spin_lock_irq(&ev_file->lock); + + if (ev_file->is_overflow_err) { + ev_file->is_overflow_err = 0; + spin_unlock_irq(&ev_file->lock); + return -EOVERFLOW; + } + + + while (list_empty(&ev_file->event_list)) { + spin_unlock_irq(&ev_file->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(ev_file->poll_wait, + (!list_empty(&ev_file->event_list) || + ev_file->is_destroyed))) { + return -ERESTARTSYS; + } + + spin_lock_irq(&ev_file->lock); + if (ev_file->is_destroyed) { + spin_unlock_irq(&ev_file->lock); + return -EIO; + } + } + + if (omit_data) { + event_sub = list_first_entry(&ev_file->event_list, + struct devx_event_subscription, + event_list); + eventsz = sizeof(event_sub->cookie); + event_data = &event_sub->cookie; + } else { + event = list_first_entry(&ev_file->event_list, + struct devx_async_event_data, list); + eventsz = sizeof(struct mlx5_eqe) + + sizeof(struct mlx5_ib_uapi_devx_async_event_hdr); + event_data = &event->hdr; + } + + if (eventsz > count) { + spin_unlock_irq(&ev_file->lock); + return -EINVAL; + } + + if (omit_data) + list_del_init(&event_sub->event_list); + else + list_del(&event->list); + + spin_unlock_irq(&ev_file->lock); + + if (copy_to_user(buf, event_data, eventsz)) + /* This points to an application issue, not a kernel concern */ + ret = -EFAULT; + else + ret = eventsz; + + if (!omit_data) + kfree(event); + return ret; +} + +static __poll_t devx_async_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct devx_async_event_file *ev_file = filp->private_data; + __poll_t pollflags = 0; + + poll_wait(filp, &ev_file->poll_wait, wait); + + spin_lock_irq(&ev_file->lock); + if (ev_file->is_destroyed) + pollflags = POLLIN | POLLRDNORM | POLLHUP; + else if (!list_empty(&ev_file->event_list)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&ev_file->lock); + + return pollflags; +} + +static void devx_free_subscription(struct rcu_head *rcu) +{ + struct devx_event_subscription *event_sub = + container_of(rcu, struct devx_event_subscription, rcu); + + if (event_sub->eventfd.file) + fdput(event_sub->eventfd); + uverbs_uobject_put(&event_sub->ev_file->uobj); + kfree(event_sub); +} + +static const struct file_operations devx_async_event_fops = { + .owner = THIS_MODULE, + .read = devx_async_event_read, + .poll = devx_async_event_poll, + .release = uverbs_uobject_fd_release, + .llseek = no_llseek, +}; + +static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct devx_async_cmd_event_file *comp_ev_file = + container_of(uobj, struct devx_async_cmd_event_file, + uobj); + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *entry, *tmp; + + spin_lock_irq(&ev_queue->lock); + ev_queue->is_destroyed = 1; + spin_unlock_irq(&ev_queue->lock); + wake_up_interruptible(&ev_queue->poll_wait); + + mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) { + list_del(&entry->list); + kvfree(entry); + } + spin_unlock_irq(&comp_ev_file->ev_queue.lock); + return 0; +}; + +static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct devx_async_event_file *ev_file = + container_of(uobj, struct devx_async_event_file, + uobj); + struct devx_event_subscription *event_sub, *event_sub_tmp; + struct mlx5_ib_dev *dev = ev_file->dev; + + spin_lock_irq(&ev_file->lock); + ev_file->is_destroyed = 1; + + /* free the pending events allocation */ + if (ev_file->omit_data) { + struct devx_event_subscription *event_sub, *tmp; + + list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list, + event_list) + list_del_init(&event_sub->event_list); + + } else { + struct devx_async_event_data *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &ev_file->event_list, + list) { + list_del(&entry->list); + kfree(entry); + } + } + + spin_unlock_irq(&ev_file->lock); + wake_up_interruptible(&ev_file->poll_wait); + + mutex_lock(&dev->devx_event_table.event_xa_lock); + /* delete the subscriptions which are related to this FD */ + list_for_each_entry_safe(event_sub, event_sub_tmp, + &ev_file->subscribed_events_list, file_list) { + devx_cleanup_subscription(dev, event_sub); + list_del_rcu(&event_sub->file_list); + /* subscription may not be used by the read API any more */ + call_rcu(&event_sub->rcu, devx_free_subscription); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + + put_device(&dev->ib_dev.dev); + return 0; +}; + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_UMEM_REG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + enum ib_access_flags), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_UMEM_DEREG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_EQN, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_UAR, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OTHER, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, + UVERBS_IDR_ANY_OBJECT, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + UVERBS_IDR_ANY_OBJECT, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + UVERBS_IDR_ANY_OBJECT, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, + u16, UA_MANDATORY), + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(u16)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE, + UVERBS_ATTR_TYPE(u64), + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); + +DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), + devx_async_cmd_event_destroy_uobj, + &devx_async_cmd_event_fops, "[devx_async_cmd]", + FMODE_READ), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS, + enum mlx5_ib_uapi_devx_create_event_channel_flags, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file), + devx_async_event_destroy_uobj, + &devx_async_event_fops, "[devx_async_event]", + FMODE_READ), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)); + +static bool devx_is_supported(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + return MLX5_CAP_GEN(dev->mdev, log_max_uctx); +} + +const struct uapi_definition mlx5_ib_devx_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_OBJ, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_UMEM, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + {}, +}; diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2016-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1106,6 +1107,12 @@ return err; } +static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps) +{ + mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n", + caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n"); +} + static u16 calc_dynamic_bfregs(int uars_per_sys_page) { /* Large page with non 4k uar support might limit the dynamic size */ @@ -1194,55 +1201,70 @@ mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); } -static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn, + u16 uid) +{ + int err; + + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return 0; + + err = mlx5_alloc_transport_domain(dev->mdev, tdn, uid); + if (err) + return err; + + return 0; +} + +static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, + u16 uid) +{ + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return; + + mlx5_dealloc_transport_domain(dev->mdev, tdn, uid); +} + +static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { + struct ib_device *ibdev = uctx->device; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; - size_t reqlen; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); bool lib_uar_4k; bool lib_uar_dyn; if (!dev->ib_active) - return ERR_PTR(-EAGAIN); + return -EAGAIN; - if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) - return ERR_PTR(-EINVAL); - - reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); - if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen >= min_req_v2) + else if (udata->inlen >= min_req_v2) ver = 2; else - return ERR_PTR(-EINVAL); + return -EINVAL; - err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) - return ERR_PTR(err); + return err; - if (req.flags) - return ERR_PTR(-EINVAL); + if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) + return -EOPNOTSUPP; if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; req.total_num_bfregs = ALIGN(req.total_num_bfregs, MLX5_NON_FP_BFREGS_PER_UAR); if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) - return ERR_PTR(-EINVAL); - - if (reqlen > sizeof(req) && - !ib_is_udata_cleared(udata, sizeof(req), - reqlen - sizeof(req))) - return ERR_PTR(-EOPNOTSUPP); + return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) @@ -1263,10 +1285,6 @@ resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; @@ -1303,19 +1321,18 @@ goto out_sys_pages; uar_done: - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; -#endif - - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_alloc_transport_domain(dev->mdev, - &context->tdn); - if (err) + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + err = mlx5_ib_devx_create(dev, true); + if (err < 0) goto out_uars; + context->devx_uid = err; } - INIT_LIST_HEAD(&context->vma_private_list); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn, + context->devx_uid); + if (err) + goto out_devx; + INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1360,17 +1377,21 @@ err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_td; + goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; context->cqe_version = resp.cqe_version; + context->lib_caps = req.lib_caps; + print_lib_caps(dev, context->lib_caps); - return &context->ibucontext; + return 0; -out_td: - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_dealloc_transport_domain(dev->mdev, context->tdn); +out_mdev: + mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); +out_devx: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context->devx_uid); out_uars: deallocate_uars(dev, context); @@ -1382,26 +1403,24 @@ kfree(bfregi->count); out_ctx: - kfree(context); - return ERR_PTR(err); + return err; } -static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; bfregi = &context->bfregi; - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); + + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context->devx_uid); deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); - kfree(context); - - return 0; } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -1435,131 +1454,9 @@ return get_arg(offset) | ((offset >> 16) & 0xff) << 8; } -static void mlx5_ib_vma_open(struct vm_area_struct *area) -{ - /* vma_open is called when a new VMA is created on top of our VMA. This - * is done through either mremap flow or split_vma (usually due to - * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, - * as this VMA is strongly hardware related. Therefore we set the - * vm_ops of the newly created/cloned VMA to NULL, to prevent it from - * calling us again and trying to do incorrect actions. We assume that - * the original VMA size is exactly a single page, and therefore all - * "splitting" operation will not happen to it. - */ - area->vm_ops = NULL; -} - -static void mlx5_ib_vma_close(struct vm_area_struct *area) -{ - struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; - - /* It's guaranteed that all VMAs opened on a FD are closed before the - * file itself is closed, therefore no sync is needed with the regular - * closing flow. (e.g. mlx5 ib_dealloc_ucontext) - * However need a sync with accessing the vma as part of - * mlx5_ib_disassociate_ucontext. - * The close operation is usually called under mm->mmap_sem except when - * process is exiting. - * The exiting case is handled explicitly as part of - * mlx5_ib_disassociate_ucontext. - */ - mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; - - /* setting the vma context pointer to null in the mlx5_ib driver's - * private data, to protect a race condition in - * mlx5_ib_disassociate_ucontext(). - */ - mlx5_ib_vma_priv_data->vma = NULL; - list_del(&mlx5_ib_vma_priv_data->list); - kfree(mlx5_ib_vma_priv_data); -} - -static const struct vm_operations_struct mlx5_ib_vm_ops = { - .open = mlx5_ib_vma_open, - .close = mlx5_ib_vma_close -}; - -static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, - struct mlx5_ib_ucontext *ctx) -{ - struct mlx5_ib_vma_private_data *vma_prv; - struct list_head *vma_head = &ctx->vma_private_list; - - vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); - if (!vma_prv) - return -ENOMEM; - - vma_prv->vma = vma; - vma->vm_private_data = vma_prv; - vma->vm_ops = &mlx5_ib_vm_ops; - - list_add(&vma_prv->list, vma_head); - - return 0; -} static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - int ret; - struct vm_area_struct *vma; - struct mlx5_ib_vma_private_data *vma_private, *n; - struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || owning_process->task_thread-> - td_proc->p_state == PRS_ZOMBIE) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - /* need to protect from a race on closing the vma as part of - * mlx5_ib_vma_close. - */ - down_write(&owning_mm->mmap_sem); - list_for_each_entry_safe(vma_private, n, &context->vma_private_list, - list) { - vma = vma_private->vma; - ret = zap_vma_ptes(vma, vma->vm_start, - PAGE_SIZE); - if (ret == -ENOTSUP) { - if (bootverbose) - WARN_ONCE( - "%s: zap_vma_ptes not implemented for unmanaged mappings", __func__); - } else { - WARN(ret, "%s: zap_vma_ptes failed, error %d", - __func__, -ret); - } - /* context going to be destroyed, should - * not access ops any more. - */ - /* XXXKIB vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); */ - vma->vm_ops = NULL; - list_del(&vma_private->list); - kfree(vma_private); - } - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -1576,6 +1473,39 @@ } } +static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, + struct vm_area_struct *vma, + struct mlx5_ib_ucontext *context) +{ + if ((vma->vm_end - vma->vm_start != PAGE_SIZE) || + !(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1) + return -EOPNOTSUPP; + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + return -EPERM; + + return -EOPNOTSUPP; +} + +static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) +{ + struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); + struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); + + switch (mentry->mmap_flag) { + case MLX5_IB_MMAP_TYPE_UAR_WC: + case MLX5_IB_MMAP_TYPE_UAR_NC: + mlx5_cmd_free_uar(dev->mdev, mentry->page_idx); + kfree(mentry); + break; + default: + WARN_ON(true); + } +} + static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) @@ -1587,7 +1517,7 @@ pgprot_t prot; u32 bfreg_dyn_idx = 0; u32 uar_index; - int dyn_uar = (cmd == MLX5_IB_MMAP_WC_PAGE); + int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC); int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : bfregi->num_static_sys_pages; @@ -1610,6 +1540,7 @@ switch (cmd) { case MLX5_IB_MMAP_WC_PAGE: + case MLX5_IB_MMAP_ALLOC_WC: case MLX5_IB_MMAP_REGULAR_PAGE: /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ prot = pgprot_writecombine(vma->vm_page_prot); @@ -1657,18 +1588,18 @@ pfn = uar_index2pfn(dev, uar_index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); - vma->vm_page_prot = prot; - err = io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot); + err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, + prot, NULL); if (err) { - mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%llx, pfn=%pa, mmap_cmd=%s\n", - err, (unsigned long long)vma->vm_start, &pfn, mmap_cmd2str(cmd)); + mlx5_ib_err(dev, + "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", + err, mmap_cmd2str(cmd)); goto err; } if (dyn_uar) bfregi->sys_pages[idx] = uar_index; - return mlx5_ib_set_vma_data(vma, context); + return 0; err: if (!dyn_uar) @@ -1682,6 +1613,48 @@ return err; } +static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma) +{ + unsigned long idx; + u8 command; + + command = get_command(vma->vm_pgoff); + idx = get_extended_index(vma->vm_pgoff); + + return (command << 16 | idx); +} + +static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, + struct vm_area_struct *vma, + struct ib_ucontext *ucontext) +{ + struct mlx5_user_mmap_entry *mentry; + struct rdma_user_mmap_entry *entry; + unsigned long pgoff; + pgprot_t prot; + phys_addr_t pfn; + int ret; + + pgoff = mlx5_vma_to_pgoff(vma); + entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff); + if (!entry) + return -EINVAL; + + mentry = to_mmmap(entry); + pfn = (mentry->address >> PAGE_SHIFT); + if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR || + mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC) + prot = pgprot_noncached(vma->vm_page_prot); + else + prot = pgprot_writecombine(vma->vm_page_prot); + ret = rdma_user_mmap_io(ucontext, vma, pfn, + entry->npages * PAGE_SIZE, + prot, + entry); + rdma_user_mmap_entry_put(&mentry->rdma_entry); + return ret; +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -1692,6 +1665,10 @@ command = get_command(vma->vm_pgoff); switch (command) { case MLX5_IB_MMAP_WC_PAGE: + case MLX5_IB_MMAP_ALLOC_WC: + if (!dev->wc_support) + return -EPERM; + /* FALLTHROUGH */ case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: return uar_mmap(dev, command, vma, context); @@ -1710,65 +1687,55 @@ if (PAGE_SIZE > 4096) return -EOPNOTSUPP; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = (dev->mdev->iseg_base + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx5_ib_dbg(dev, "mapped internal timer at 0x%llx, PA 0x%llx\n", - (unsigned long long)vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); - break; + return rdma_user_mmap_io(&context->ibucontext, vma, pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + NULL); + case MLX5_IB_MMAP_CLOCK_INFO: + return mlx5_ib_mmap_clock_info_page(dev, vma, context); default: - return -EINVAL; + return mlx5_ib_mmap_offset(dev, vma, ibcontext); } return 0; } -static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { + struct mlx5_ib_pd *pd = to_mpd(ibpd); + struct ib_device *ibdev = ibpd->device; struct mlx5_ib_alloc_pd_resp resp; - struct mlx5_ib_pd *pd; int err; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + u16 uid = context ? context->devx_uid : 0; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - - err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn, uid); + if (err) + return (err); - if (context) { + pd->uid = uid; + if (udata) { resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); - kfree(pd); - return ERR_PTR(-EFAULT); + mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); - kfree(mpd); - - return 0; + mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } enum { @@ -2358,7 +2325,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain) + int domain, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -2372,6 +2340,7 @@ return ERR_PTR(-ENOSPC); if (domain != IB_FLOW_DOMAIN_USER || + udata != NULL || flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) return ERR_PTR(-EINVAL); @@ -2734,9 +2703,12 @@ if (err) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - mlx5_ib_destroy_qp(dev->umrc.qp); - ib_free_cq(dev->umrc.cq); - ib_dealloc_pd(dev->umrc.pd); + if (dev->umrc.qp) + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); + if (dev->umrc.cq) + ib_free_cq(dev->umrc.cq); + if (dev->umrc.pd) + ib_dealloc_pd(dev->umrc.pd); } enum { @@ -2835,13 +2807,16 @@ return 0; error_4: - mlx5_ib_destroy_qp(qp); + mlx5_ib_destroy_qp(qp, NULL); + dev->umrc.qp = NULL; error_3: ib_free_cq(cq); + dev->umrc.cq = NULL; error_2: ib_dealloc_pd(pd); + dev->umrc.pd = NULL; error_0: kfree(attr); @@ -2853,36 +2828,42 @@ { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); + ibdev = &dev->ib_dev; mutex_init(&devr->mutex); - devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); - if (IS_ERR(devr->p0)) { - ret = PTR_ERR(devr->p0); - goto error0; - } - devr->p0->device = &dev->ib_dev; + devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); + if (!devr->p0) + return -ENOMEM; + + devr->p0->device = ibdev; devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); - if (IS_ERR(devr->c0)) { - ret = PTR_ERR(devr->c0); + ret = mlx5_ib_alloc_pd(devr->p0, NULL); + if (ret) + goto error0; + + devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq); + if (!devr->c0) { + ret = -ENOMEM; goto error1; } - devr->c0->device = &dev->ib_dev; - devr->c0->uobject = NULL; - devr->c0->comp_handler = NULL; - devr->c0->event_handler = NULL; - devr->c0->cq_context = NULL; + + devr->c0->device = &dev->ib_dev; atomic_set(&devr->c0->usecnt, 0); - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL); + if (ret) + goto err_create_cq; + + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); goto error2; @@ -2893,7 +2874,7 @@ mutex_init(&devr->x0->tgt_qp_mutex); INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x1)) { ret = PTR_ERR(devr->x1); goto error3; @@ -2908,24 +2889,26 @@ attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; - attr.ext.xrc.cq = devr->c0; + attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; - devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s0)) { - ret = PTR_ERR(devr->s0); + devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s0) { + ret = -ENOMEM; goto error4; } + devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; - devr->s0->uobject = NULL; - devr->s0->event_handler = NULL; - devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; - devr->s0->ext.xrc.cq = devr->c0; + devr->s0->ext.cq = devr->c0; + ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); + if (ret) + goto err_create; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); - atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -2933,20 +2916,23 @@ attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; - devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s1)) { - ret = PTR_ERR(devr->s1); + devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s1) { + ret = -ENOMEM; goto error5; } + devr->s1->device = &dev->ib_dev; devr->s1->pd = devr->p0; - devr->s1->uobject = NULL; - devr->s1->event_handler = NULL; - devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; - devr->s1->ext.xrc.cq = devr->c0; + devr->s1->ext.cq = devr->c0; + + ret = mlx5_ib_create_srq(devr->s1, &attr, NULL); + if (ret) + goto error6; + atomic_inc(&devr->p0->usecnt); - atomic_set(&devr->s0->usecnt, 0); + atomic_set(&devr->s1->usecnt, 0); for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { INIT_WORK(&devr->ports[port].pkey_change_work, @@ -2956,35 +2942,44 @@ return 0; +error6: + kfree(devr->s1); error5: - mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_destroy_srq(devr->s0, NULL); +err_create: + kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); error3: - mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); error2: - mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_destroy_cq(devr->c0, NULL); +err_create_cq: + kfree(devr->c0); error1: - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_dealloc_pd(devr->p0, NULL); error0: + kfree(devr->p0); return ret; } static void destroy_dev_resources(struct mlx5_ib_resources *devr) { - struct mlx5_ib_dev *dev = - container_of(devr, struct mlx5_ib_dev, devr); int port; - mlx5_ib_destroy_srq(devr->s1); - mlx5_ib_destroy_srq(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0); - mlx5_ib_dealloc_xrcd(devr->x1); - mlx5_ib_destroy_cq(devr->c0); - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_destroy_srq(devr->s1, NULL); + kfree(devr->s1); + mlx5_ib_destroy_srq(devr->s0, NULL); + kfree(devr->s0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_ib_destroy_cq(devr->c0, NULL); + kfree(devr->c0); + mlx5_ib_dealloc_pd(devr->p0, NULL); + kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ - for (port = 0; port < dev->num_ports; ++port) + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); } @@ -3296,6 +3291,7 @@ MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); + INIT_IB_DEVICE_OPS(&dev->ib_dev.ops, mlx5, MLX5); snprintf(dev->ib_dev.name, IB_DEVICE_NAME_MAX, "mlx5_%d", device_get_unit(mdev->pdev->dev.bsddev)); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; @@ -3353,6 +3349,7 @@ dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; dev->ib_dev.mmap = mlx5_ib_mmap; + dev->ib_dev.mmap_free = mlx5_ib_mmap_free; dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; dev->ib_dev.create_ah = mlx5_ib_create_ah; diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2021, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1303,7 +1303,7 @@ return 0; } -int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibmr->device); struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -1344,7 +1344,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); @@ -1389,7 +1389,7 @@ goto err_free_in; mr->desc_size = sizeof(struct mlx5_klm); mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SIGNATURE) { + } else if (mr_type == IB_MR_TYPE_INTEGRITY) { u32 psv_index[2]; MLX5_SET(mkc, mkc, bsf_en, 1); diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "mlx5_ib.h" /* not supported currently */ @@ -679,11 +680,15 @@ return err; } -static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq, + struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) ib_umem_release(rwq->umem); @@ -770,6 +775,7 @@ __be64 *pas; void *qpc; int err; + u16 uid; u32 uar_flags; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -837,6 +843,9 @@ goto err_umem; } + uid = (attr->qp_type != IB_QPT_XRC_TGT && + attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; + MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); @@ -885,11 +894,15 @@ } static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, - struct mlx5_ib_qp_base *base) + struct mlx5_ib_qp_base *base, + struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); if (base->ubuffer.umem) ib_umem_release(base->ubuffer.umem); @@ -1045,19 +1058,21 @@ } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq, u32 tdn) + struct mlx5_ib_sq *sq, u32 tdn, + struct ib_pd *pd) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) + struct mlx5_ib_sq *sq, struct ib_pd *pd) { - mlx5_core_destroy_tis(dev->mdev, sq->tisn); + mlx5_core_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid); } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, @@ -1093,6 +1108,7 @@ goto err_umem; } + MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid); sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); @@ -1154,7 +1170,8 @@ } static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, void *qpin) + struct mlx5_ib_rq *rq, void *qpin, + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1175,6 +1192,7 @@ if (!in) return -ENOMEM; + MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); MLX5_SET(rqc, rqc, vlan_strip_disable, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE); @@ -1216,7 +1234,8 @@ } static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, u32 tdn) + struct mlx5_ib_rq *rq, u32 tdn, + struct ib_pd *pd) { u32 *in; void *tirc; @@ -1228,6 +1247,7 @@ if (!in) return -ENOMEM; + MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); @@ -1241,9 +1261,10 @@ } static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq) + struct mlx5_ib_rq *rq, + struct ib_pd *pd) { - mlx5_core_destroy_tir(dev->mdev, rq->tirn); + mlx5_core_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid); } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -1260,7 +1281,7 @@ u32 tdn = mucontext->tdn; if (qp->sq.wqe_cnt) { - err = create_raw_packet_qp_tis(dev, sq, tdn); + err = create_raw_packet_qp_tis(dev, sq, tdn, pd); if (err) return err; @@ -1274,12 +1295,12 @@ if (qp->rq.wqe_cnt) { rq->base.container_mibqp = qp; - err = create_raw_packet_qp_rq(dev, rq, in); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn); + err = create_raw_packet_qp_tir(dev, rq, tdn, pd); if (err) goto err_destroy_rq; } @@ -1296,7 +1317,7 @@ return err; destroy_raw_packet_qp_sq(dev, sq); err_destroy_tis: - destroy_raw_packet_qp_tis(dev, sq); + destroy_raw_packet_qp_tis(dev, sq, pd); return err; } @@ -1309,13 +1330,13 @@ struct mlx5_ib_rq *rq = &raw_packet_qp->rq; if (qp->rq.wqe_cnt) { - destroy_raw_packet_qp_tir(dev, rq); + destroy_raw_packet_qp_tir(dev, rq, qp->ibqp.pd); destroy_raw_packet_qp_rq(dev, rq); } if (qp->sq.wqe_cnt) { destroy_raw_packet_qp_sq(dev, sq); - destroy_raw_packet_qp_tis(dev, sq); + destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd); } } @@ -1333,7 +1354,8 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { - mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); + mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn, + to_mpd(qp->ibqp.pd)->uid); } static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, @@ -1405,6 +1427,7 @@ if (!in) return -ENOMEM; + MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); @@ -1854,7 +1877,7 @@ err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, pd, qp, base); + destroy_qp_user(dev, pd, qp, base, udata); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); @@ -1964,7 +1987,8 @@ const struct mlx5_modify_raw_qp_param *raw_qp_param, u8 lag_tx_affinity); -static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_udata *udata) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; @@ -2033,7 +2057,7 @@ if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base); + destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } static const char *ib_qp_type_str(enum ib_qp_type type) @@ -2163,7 +2187,7 @@ return &qp->ibqp; } -int mlx5_ib_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -2171,7 +2195,7 @@ if (unlikely(qp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_destroy_qp(qp); - destroy_qp_common(dev, mqp); + destroy_qp_common(dev, mqp, udata); kfree(mqp); @@ -2244,7 +2268,8 @@ } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, u8 sl) + struct mlx5_ib_sq *sq, u8 sl, + struct ib_pd *pd) { void *in; void *tisc; @@ -2257,6 +2282,7 @@ return -ENOMEM; MLX5_SET(modify_tis_in, in, bitmask.prio, 1); + MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid); tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); @@ -2269,7 +2295,8 @@ } static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, u8 tx_affinity) + struct mlx5_ib_sq *sq, u8 tx_affinity, + struct ib_pd *pd) { void *in; void *tisc; @@ -2282,6 +2309,7 @@ return -ENOMEM; MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); + MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid); tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); @@ -2362,7 +2390,7 @@ if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, &qp->raw_packet_qp.sq, - ah->sl & 0xf); + ah->sl & 0xf, qp->ibqp.pd); return 0; } @@ -2510,9 +2538,9 @@ return result; } -static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, int new_state, - const struct mlx5_modify_raw_qp_param *raw_qp_param) +static int modify_raw_packet_qp_rq( + struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd) { void *in; void *rqc; @@ -2526,6 +2554,7 @@ MLX5_SET(modify_rq_in, in, rqn, rq->base.mqp.qpn); MLX5_SET(modify_rq_in, in, rq_state, rq->state); + MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(rqc, rqc, state, new_state); @@ -2552,7 +2581,8 @@ } static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, int new_state) + struct mlx5_ib_sq *sq, int new_state, + struct ib_pd *pd) { void *in; void *sqc; @@ -2565,6 +2595,7 @@ return -ENOMEM; MLX5_SET(modify_sq_in, in, sqn, sq->base.mqp.qpn); + MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid); MLX5_SET(modify_sq_in, in, sq_state, sq->state); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); @@ -2588,6 +2619,8 @@ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + int modify_rq = !!qp->rq.wqe_cnt; + int modify_sq = !!qp->sq.wqe_cnt; int rq_state; int sq_state; int err; @@ -2605,10 +2638,11 @@ rq_state = MLX5_RQC_STATE_RST; sq_state = MLX5_SQC_STATE_RST; break; - case MLX5_CMD_OP_INIT2INIT_QP: - case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: + return raw_qp_param->set_mask ? -EINVAL : 0; + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: if (raw_qp_param->set_mask) return -EINVAL; else @@ -2618,21 +2652,23 @@ return -EINVAL; } - if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); + if (modify_rq) { + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param, + qp->ibqp.pd); if (err) return err; } - if (qp->sq.wqe_cnt) { + if (modify_sq) { if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, - tx_affinity); + tx_affinity, + qp->ibqp.pd); if (err) return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, qp->ibqp.pd); } return 0; @@ -3815,10 +3851,10 @@ return 0; } -static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq, bool send_signaled, bool solicited) +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq, int send_flags) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; @@ -3829,8 +3865,10 @@ *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); + (send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -3838,16 +3876,6 @@ return 0; } -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) -{ - return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, - wr->send_flags & IB_SEND_SIGNALED, - wr->send_flags & IB_SEND_SOLICITED); -} - static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, @@ -3929,7 +3957,7 @@ goto out; } - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq, wr->send_flags); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4001,8 +4029,8 @@ * SET_PSV WQEs are not signaled and solicited * on error */ - err = __begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq, false, true); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq, IB_SEND_SOLICITED); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4022,8 +4050,8 @@ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, MLX5_OPCODE_SET_PSV); - err = __begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq, false, true); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq, wr->send_flags); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4610,8 +4638,7 @@ } struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; @@ -4633,20 +4660,17 @@ return &xrcd->ibxrcd; } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; int err; err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn); - if (err) { + if (err) mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); - return err; - } kfree(xrcd); - return 0; } @@ -4690,6 +4714,7 @@ if (!in) return -ENOMEM; + MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE); @@ -4842,22 +4867,20 @@ err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(pd, rwq); + destroy_user_rq(pd, rwq, udata); err: kfree(rwq); return ERR_PTR(err); } -int mlx5_ib_destroy_wq(struct ib_wq *wq) +void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(wq->pd, rwq); + destroy_user_rq(wq->pd, rwq, udata); kfree(rwq); - - return 0; } struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -4911,6 +4934,9 @@ for (i = 0; i < sz; i++) MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); + rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid; + MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid); + err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); kvfree(in); @@ -4929,7 +4955,7 @@ return &rwq_ind_tbl->ib_rwq_ind_tbl; err_copy: - mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); err: kfree(rwq_ind_tbl); return ERR_PTR(err); @@ -4940,7 +4966,7 @@ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); - mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); kfree(rwq_ind_tbl); return 0; @@ -4992,6 +5018,7 @@ if (wq_state == IB_WQS_ERR) wq_state = MLX5_RQC_STATE_ERR; MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); + MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid); MLX5_SET(rqc, rqc, state, wq_state); err = mlx5_core_modify_rq(dev->mdev, in, inlen); diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_srq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,12 +31,10 @@ #include #include #include +#include #include "mlx5_ib.h" -/* not supported currently */ -static int srq_signature; - static void *get_wqe(struct mlx5_ib_srq *srq, int n) { return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); @@ -73,6 +71,8 @@ { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; int npages; @@ -96,17 +96,15 @@ udata->inlen - sizeof(ucmd))) return -EINVAL; - if (in->type == IB_SRQT_XRC) { - err = get_srq_user_index(to_mucontext(pd->uobject->context), - &ucmd, udata->inlen, &uidx); + if (in->type != IB_SRQT_BASIC) { + err = get_srq_user_index(ucontext, &ucmd, udata->inlen, &uidx); if (err) return err; } srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, - 0, 0); + srq->umem = ib_umem_get(&ucontext->ibucontext, ucmd.buf_addr, buf_size, 0, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); @@ -130,8 +128,7 @@ mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); - err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), - ucmd.db_addr, &srq->db); + err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); goto err_in; @@ -139,9 +136,11 @@ in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; + in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = uidx; + return 0; err_in: @@ -192,16 +191,14 @@ srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); if (!srq->wrid) { - mlx5_ib_dbg(dev, "kmalloc failed %lu\n", - (unsigned long)(srq->msrq.max * sizeof(u64))); err = -ENOMEM; goto err_in; } - srq->wq_sig = !!srq_signature; + srq->wq_sig = 0; in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; @@ -217,9 +214,15 @@ return err; } -static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &srq->db); ib_umem_release(srq->umem); } @@ -231,16 +234,16 @@ mlx5_db_free(dev->mdev, &srq->db); } -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_dev *dev = to_mdev(ib_srq->device); + struct mlx5_ib_srq *srq = to_msrq(ib_srq); size_t desc_size; size_t buf_size; int err; - struct mlx5_srq_attr in = {0}; + struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -248,13 +251,9 @@ mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, max_srq_wqes); - return ERR_PTR(-EINVAL); + return -EINVAL; } - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); @@ -262,50 +261,50 @@ desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); - if (desc_size == 0 || srq->msrq.max_gs > desc_size) { - err = -EINVAL; - goto err_srq; - } + if (desc_size == 0 || srq->msrq.max_gs > desc_size) + return -EINVAL; + desc_size = roundup_pow_of_two(desc_size); desc_size = max_t(size_t, 32, desc_size); - if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) { - err = -EINVAL; - goto err_srq; - } + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) + return -EINVAL; + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - if (buf_size < desc_size) { - err = -EINVAL; - goto err_srq; - } + if (buf_size < desc_size) + return -EINVAL; + in.type = init_attr->srq_type; - if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size); + if (udata) + err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); - if (err || !in.pas) { + if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", - pd->uobject ? "user" : "kernel", err); - goto err_srq; + udata ? "user" : "kernel", err); + return err; } in.log_size = ilog2(srq->msrq.max); in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) { + + if (init_attr->srq_type == IB_SRQT_XRC) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; - } else if (init_attr->srq_type == IB_SRQT_BASIC) { + else in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + + if (ib_srq_has_cq(init_attr->srq_type)) + in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; + else in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; - } - in.pd = to_mpd(pd)->pdn; + in.pd = to_mpd(ib_srq->pd)->pdn; in.db_record = srq->db.dma; err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); kvfree(in.pas); @@ -319,7 +318,7 @@ srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; @@ -328,21 +327,18 @@ init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_core: mlx5_core_destroy_srq(dev->mdev, &srq->msrq); err_usr_kern_srq: - if (pd->uobject) - destroy_srq_user(pd, srq); + if (udata) + destroy_srq_user(ib_srq->pd, srq, udata); else destroy_srq_kernel(dev, srq); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -395,7 +391,7 @@ return ret; } -int mlx5_ib_destroy_srq(struct ib_srq *srq) +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); @@ -403,14 +399,16 @@ mlx5_core_destroy_srq(dev->mdev, &msrq->msrq); if (srq->uobject) { - mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &msrq->db); ib_umem_release(msrq->umem); } else { destroy_srq_kernel(dev, msrq); } - - kfree(srq); - return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h --- a/sys/dev/mlx5/mlx5_ifc.h +++ b/sys/dev/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,6 +31,7 @@ #include enum { + MLX5_EVENT_TYPE_NOTIFY_ANY = 0x0, MLX5_EVENT_TYPE_COMP = 0x0, MLX5_EVENT_TYPE_PATH_MIG = 0x1, MLX5_EVENT_TYPE_COMM_EST = 0x2, @@ -51,6 +52,7 @@ MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_TEMP_WARN_EVENT = 0x17, + MLX5_EVENT_TYPE_XRQ_ERROR = 0x18, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT = 0x1e, MLX5_EVENT_TYPE_CODING_PPS_EVENT = 0x25, @@ -83,6 +85,24 @@ MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; +enum { + MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, + MLX5_OBJ_TYPE_MKEY = 0xff01, + MLX5_OBJ_TYPE_QP = 0xff02, + MLX5_OBJ_TYPE_PSV = 0xff03, + MLX5_OBJ_TYPE_RMP = 0xff04, + MLX5_OBJ_TYPE_XRC_SRQ = 0xff05, + MLX5_OBJ_TYPE_RQ = 0xff06, + MLX5_OBJ_TYPE_SQ = 0xff07, + MLX5_OBJ_TYPE_TIR = 0xff08, + MLX5_OBJ_TYPE_TIS = 0xff09, + MLX5_OBJ_TYPE_DCT = 0xff0a, + MLX5_OBJ_TYPE_XRQ = 0xff0b, + MLX5_OBJ_TYPE_RQT = 0xff0e, + MLX5_OBJ_TYPE_FLOW_COUNTER = 0xff0f, + MLX5_OBJ_TYPE_CQ = 0xff10, +}; + enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, @@ -140,6 +160,16 @@ MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714, MLX5_CMD_OP_SET_DC_CNAK_TRACE = 0x715, MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716, + MLX5_CMD_OP_CREATE_XRQ = 0x717, + MLX5_CMD_OP_DESTROY_XRQ = 0x718, + MLX5_CMD_OP_QUERY_XRQ = 0x719, + MLX5_CMD_OP_ARM_XRQ = 0x71a, + MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, + MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, + MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, + MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729, + MLX5_CMD_OP_MODIFY_XRQ = 0x72a, + MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -246,8 +276,12 @@ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e, + MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, + MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, @@ -257,7 +291,16 @@ MLX5_CMD_OP_MODIFY_GENERAL_OBJ = 0xa01, MLX5_CMD_OP_QUERY_GENERAL_OBJ = 0xa02, MLX5_CMD_OP_DESTROY_GENERAL_OBJ = 0xa03, + MLX5_CMD_OP_CREATE_UCTX = 0xa04, + MLX5_CMD_OP_DESTROY_UCTX = 0xa06, + MLX5_CMD_OP_CREATE_UMEM = 0xa08, + MLX5_CMD_OP_DESTROY_UMEM = 0xa0a, +}; +/* Valid range for general commands that don't work over an object */ +enum { + MLX5_CMD_OP_GENERAL_START = 0xb00, + MLX5_CMD_OP_GENERAL_END = 0xd00, }; enum { @@ -947,6 +990,12 @@ u8 reserved_6[0x700]; }; +struct mlx5_ifc_device_event_cap_bits { + u8 user_affiliated_events[4][0x40]; + + u8 user_unaffiliated_events[4][0x40]; +}; + enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x1, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, @@ -1043,6 +1092,11 @@ MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED = 0x3, }; +enum { + MLX5_UCTX_CAP_RAW_TX = 1UL << 0, + MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, +}; + enum { MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING = 0x0, MLX5_SQ_TIMESTAMP_FORMAT_CAP_REAL_TIME = 0x1, @@ -1060,7 +1114,8 @@ u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; - u8 reserved_1[0xb]; + u8 event_cap[0x1]; + u8 reserved_1[0xa]; u8 log_max_qp[0x5]; u8 reserved_2[0xb]; @@ -1338,7 +1393,14 @@ u8 cqe_compression_timeout[0x10]; u8 cqe_compression_max_num[0x10]; - u8 reserved_69[0x220]; + u8 reserved_5e0[0xc0]; + + u8 uctx_cap[0x20]; + + u8 reserved_6c0[0xc0]; + + u8 vhca_tunnel_commands[0x40]; + u8 reserved_at_7c0[0x40]; }; enum mlx5_flow_destination_type { @@ -1436,7 +1498,9 @@ u8 reserved_6[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_7[0x15]; + u8 dbr_umem_valid[0x1]; + u8 wq_umem_valid[0x1]; + u8 reserved_7[0x13]; u8 single_wqe_log_num_of_strides[0x3]; u8 two_byte_shift_en[0x1]; u8 reserved_8[0x4]; @@ -2117,20 +2181,10 @@ u8 dc_access_key[0x40]; - u8 rdma_active[0x1]; - u8 comm_est[0x1]; - u8 suspended[0x1]; - u8 reserved_31[0x5]; - u8 send_msg_psn[0x18]; - - u8 reserved_32[0x8]; - u8 rcv_msg_psn[0x18]; + u8 reserved_at_680[0x3]; + u8 dbr_umem_valid[0x1]; - u8 rdma_va[0x40]; - - u8 rdma_key[0x20]; - - u8 reserved_33[0x20]; + u8 reserved_at_684[0xbc]; }; struct mlx5_ifc_roce_addr_layout_bits { @@ -2222,7 +2276,8 @@ u8 xrcd[0x18]; u8 page_offset[0x6]; - u8 reserved_2[0x2]; + u8 reserved_at_46[0x1]; + u8 dbr_umem_valid[0x1]; u8 cqn[0x18]; u8 reserved_3[0x20]; @@ -2643,6 +2698,9 @@ MLX5_ACCESS_MODE_PA = 0x0, MLX5_ACCESS_MODE_MTT = 0x1, MLX5_ACCESS_MODE_KLM = 0x2, + MLX5_ACCESS_MODE_KSM = 0x3, + MLX5_ACCESS_MODE_SW_ICM = 0x4, + MLX5_ACCESS_MODE_MEMIC = 0x5, }; struct mlx5_ifc_mkc_bits { @@ -3003,7 +3061,9 @@ struct mlx5_ifc_cqc_bits { u8 status[0x4]; - u8 reserved_0[0x4]; + u8 reserved_at_4[0x2]; + u8 dbr_umem_valid[0x1]; + u8 reserved_at_7[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_1[0x1]; @@ -3117,6 +3177,54 @@ u8 crc16[0x10]; }; +enum { + MLX5_XRQC_STATE_GOOD = 0x0, + MLX5_XRQC_STATE_ERROR = 0x1, +}; + +enum { + MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0, + MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1, +}; + +enum { + MLX5_XRQC_OFFLOAD_RNDV = 0x1, +}; + +struct mlx5_ifc_tag_matching_topology_context_bits { + u8 log_matching_list_sz[0x4]; + u8 reserved_at_4[0xc]; + u8 append_next_index[0x10]; + + u8 sw_phase_cnt[0x10]; + u8 hw_phase_cnt[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_xrqc_bits { + u8 state[0x4]; + u8 rlkey[0x1]; + u8 reserved_at_5[0xf]; + u8 topology[0x4]; + u8 reserved_at_18[0x4]; + u8 offload[0x4]; + + u8 reserved_at_20[0x8]; + u8 user_index[0x18]; + + u8 reserved_at_40[0x8]; + u8 cqn[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; + + u8 reserved_at_180[0x280]; + + struct mlx5_ifc_wq_bits wq; +}; + struct mlx5_ifc_nodnic_port_config_reg_bits { struct mlx5_ifc_nodnic_event_word_bits event; @@ -3522,7 +3630,7 @@ struct mlx5_ifc_sqd2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -3956,6 +4064,30 @@ u8 reserved_5[0x80]; }; +struct mlx5_ifc_query_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_xrqc_bits xrq_context; +}; + +struct mlx5_ifc_query_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_resume_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -3995,7 +4127,7 @@ struct mlx5_ifc_query_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5070,6 +5202,93 @@ u8 reserved_2[0x40]; }; +struct mlx5_ifc_packet_reformat_context_in_bits { + u8 reserved_at_0[0x5]; + u8 reformat_type[0x3]; + u8 reserved_at_8[0xe]; + u8 reformat_data_size[0xa]; + + u8 reserved_at_20[0x10]; + u8 reformat_data[2][0x8]; + + u8 more_reformat_data[0][0x8]; +}; + +struct mlx5_ifc_query_packet_reformat_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0]; +}; + +struct mlx5_ifc_query_packet_reformat_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 packet_reformat_id[0x20]; + + u8 reserved_at_60[0xa0]; +}; + +struct mlx5_ifc_alloc_packet_reformat_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 packet_reformat_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +enum mlx5_reformat_ctx_type { + MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, + MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, + MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, + MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, +}; + +struct mlx5_ifc_alloc_packet_reformat_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context; +}; + +struct mlx5_ifc_dealloc_packet_reformat_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_packet_reformat_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_20[0x10]; + u8 op_mod[0x10]; + + u8 packet_reformat_id[0x20]; + + u8 reserved_60[0x20]; +}; + struct mlx5_ifc_query_cq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -5396,7 +5615,7 @@ struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5430,7 +5649,7 @@ struct mlx5_ifc_modify_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5458,7 +5677,7 @@ struct mlx5_ifc_modify_sq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5528,7 +5747,7 @@ struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5561,7 +5780,7 @@ struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5597,7 +5816,7 @@ struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5807,7 +6026,7 @@ struct mlx5_ifc_modify_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -5819,7 +6038,12 @@ struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x60]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2e1[0x1f]; + + u8 reserved_at_300[0x580]; u8 pas[0][0x40]; }; @@ -6162,7 +6386,7 @@ struct mlx5_ifc_detach_from_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6186,7 +6410,7 @@ struct mlx5_ifc_destroy_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6208,7 +6432,7 @@ struct mlx5_ifc_destroy_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6230,7 +6454,7 @@ struct mlx5_ifc_destroy_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6252,7 +6476,7 @@ struct mlx5_ifc_destroy_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6324,7 +6548,7 @@ struct mlx5_ifc_destroy_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6609,7 +6833,7 @@ struct mlx5_ifc_destroy_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6712,7 +6936,7 @@ struct mlx5_ifc_dealloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6756,7 +6980,7 @@ struct mlx5_ifc_dealloc_transport_domain_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6878,7 +7102,7 @@ struct mlx5_ifc_dealloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6911,6 +7135,30 @@ u8 reserved_3[0x20]; }; +struct mlx5_ifc_create_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_create_xrq_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_xrqc_bits xrq_context; +}; + struct mlx5_ifc_deactivate_tracer_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -6946,7 +7194,7 @@ struct mlx5_ifc_create_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6955,7 +7203,12 @@ struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x60]; + + u8 xrc_srq_umem_valid[0x1]; + u8 reserved_at_2e1[0x1f]; + + u8 reserved_at_300[0x580]; u8 pas[0][0x40]; }; @@ -6974,7 +7227,7 @@ struct mlx5_ifc_create_tis_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -6998,7 +7251,7 @@ struct mlx5_ifc_create_tir_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7022,7 +7275,7 @@ struct mlx5_ifc_create_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7108,7 +7361,7 @@ struct mlx5_ifc_create_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7156,7 +7409,7 @@ struct mlx5_ifc_create_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7196,7 +7449,10 @@ struct mlx5_ifc_qpc_bits qpc; - u8 reserved_5[0x80]; + u8 reserved_at_800[0x60]; + + u8 wq_umem_valid[0x1]; + u8 reserved_at_861[0x1f]; u8 pas[0][0x40]; }; @@ -7282,7 +7538,8 @@ u8 reserved_2[0x20]; u8 pg_access[0x1]; - u8 reserved_3[0x1f]; + u8 mkey_umem_valid[0x1]; + u8 reserved_at_62[0x1e]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; @@ -7478,7 +7735,7 @@ struct mlx5_ifc_create_cq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7487,7 +7744,10 @@ struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_3[0x600]; + u8 reserved_at_280[0x60]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2e1[0x59f]; u8 pas[0][0x40]; }; @@ -7535,7 +7795,7 @@ struct mlx5_ifc_attach_to_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7548,6 +7808,29 @@ u8 multicast_gid[16][0x8]; }; +struct mlx5_ifc_arm_xrq_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_arm_xrq_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 xrqn[0x18]; + + u8 reserved_at_60[0x10]; + u8 lwm[0x10]; +}; + struct mlx5_ifc_arm_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -7563,7 +7846,7 @@ struct mlx5_ifc_arm_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7590,7 +7873,7 @@ struct mlx5_ifc_arm_rq_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7638,7 +7921,7 @@ struct mlx5_ifc_alloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7682,7 +7965,7 @@ struct mlx5_ifc_alloc_transport_domain_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7704,7 +7987,7 @@ struct mlx5_ifc_alloc_q_counter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7726,7 +8009,7 @@ struct mlx5_ifc_alloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 uid[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; @@ -7736,24 +8019,24 @@ struct mlx5_ifc_alloc_flow_counter_out_bits { u8 status[0x8]; - u8 reserved_0[0x18]; + u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_1[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; - u8 reserved_2[0x20]; + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_alloc_flow_counter_in_bits { u8 opcode[0x10]; - u8 reserved_0[0x10]; + u8 reserved_at_10[0x10]; - u8 reserved_1[0x10]; + u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_2[0x40]; + u8 reserved_at_40[0x38]; + u8 flow_counter_bulk[0x8]; }; struct mlx5_ifc_add_vxlan_udp_dport_out_bits { @@ -7810,7 +8093,7 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -10847,4 +11130,147 @@ u8 sensor_name_lo[0x20]; }; +struct mlx5_ifc_general_obj_in_cmd_hdr_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 vhca_tunnel_id[0x10]; + u8 obj_type[0x10]; + + u8 obj_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_general_obj_out_cmd_hdr_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 obj_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_umem_bits { + u8 reserved_at_0[0x80]; + + u8 reserved_at_80[0x1b]; + u8 log_page_size[0x5]; + + u8 page_offset[0x20]; + + u8 num_of_mtt[0x40]; + + struct mlx5_ifc_mtt_bits mtt[0]; +}; + +struct mlx5_ifc_uctx_bits { + u8 cap[0x20]; + + u8 reserved_at_20[0x160]; +}; + +struct mlx5_ifc_create_umem_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_umem_bits umem; +}; + +struct mlx5_ifc_create_uctx_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_uctx_bits uctx; +}; + +struct mlx5_ifc_destroy_uctx_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 uid[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_mtrc_string_db_param_bits { + u8 string_db_base_address[0x20]; + + u8 reserved_at_20[0x8]; + u8 string_db_size[0x18]; +}; + +struct mlx5_ifc_mtrc_cap_bits { + u8 trace_owner[0x1]; + u8 trace_to_memory[0x1]; + u8 reserved_at_2[0x4]; + u8 trc_ver[0x2]; + u8 reserved_at_8[0x14]; + u8 num_string_db[0x4]; + + u8 first_string_trace[0x8]; + u8 num_string_trace[0x8]; + u8 reserved_at_30[0x28]; + + u8 log_max_trace_buffer_size[0x8]; + + u8 reserved_at_60[0x20]; + + struct mlx5_ifc_mtrc_string_db_param_bits string_db_param[8]; + + u8 reserved_at_280[0x180]; +}; + +struct mlx5_ifc_mtrc_conf_bits { + u8 reserved_at_0[0x1c]; + u8 trace_mode[0x4]; + u8 reserved_at_20[0x18]; + u8 log_trace_buffer_size[0x8]; + u8 trace_mkey[0x20]; + u8 reserved_at_60[0x3a0]; +}; + +struct mlx5_ifc_mtrc_stdb_bits { + u8 string_db_index[0x4]; + u8 reserved_at_4[0x4]; + u8 read_size[0x18]; + u8 start_offset[0x20]; + u8 string_db_data[0]; +}; + +struct mlx5_ifc_mtrc_ctrl_bits { + u8 trace_status[0x2]; + u8 reserved_at_2[0x2]; + u8 arm_event[0x1]; + u8 reserved_at_5[0xb]; + u8 modify_field_select[0x10]; + u8 reserved_at_20[0x2b]; + u8 current_timestamp52_32[0x15]; + u8 current_timestamp31_0[0x20]; + u8 reserved_at_80[0x180]; +}; + +struct mlx5_ifc_affiliated_event_header_bits { + u8 reserved_at_0[0x10]; + u8 obj_type[0x10]; + + u8 obj_id[0x20]; +}; + #endif /* MLX5_IFC_H */ diff --git a/sys/dev/mlx5/qp.h b/sys/dev/mlx5/qp.h --- a/sys/dev/mlx5/qp.h +++ b/sys/dev/mlx5/qp.h @@ -563,7 +563,7 @@ return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } -static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) +static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) { return radix_tree_lookup(&dev->priv.mr_table.tree, key); } diff --git a/sys/dev/mlx5/srq.h b/sys/dev/mlx5/srq.h --- a/sys/dev/mlx5/srq.h +++ b/sys/dev/mlx5/srq.h @@ -51,6 +51,7 @@ u32 user_index; u64 db_record; u64 *pas; + u16 uid; }; struct mlx5_core_dev; diff --git a/sys/dev/mthca/mthca_dev.h b/sys/dev/mthca/mthca_dev.h --- a/sys/dev/mthca/mthca_dev.h +++ b/sys/dev/mthca/mthca_dev.h @@ -512,7 +512,8 @@ void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq); + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); @@ -549,7 +550,8 @@ enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp); + struct mthca_qp *qp, + struct ib_udata *udata); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -558,7 +560,8 @@ struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp); + struct mthca_sqp *sqp, + struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, struct mthca_pd *pd, diff --git a/sys/dev/mthca/mthca_mad.c b/sys/dev/mthca/mthca_mad.c --- a/sys/dev/mthca/mthca_mad.c +++ b/sys/dev/mthca/mthca_mad.c @@ -87,13 +87,13 @@ ah_attr.port_num = port_num; new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - ib_destroy_ah(dev->sm_ah[port_num - 1]); + ib_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -344,6 +344,7 @@ } if (dev->sm_ah[p]) - ib_destroy_ah(dev->sm_ah[p]); + ib_destroy_ah(dev->sm_ah[p], + RDMA_DESTROY_AH_SLEEPABLE); } } diff --git a/sys/dev/mthca/mthca_provider.c b/sys/dev/mthca/mthca_provider.c --- a/sys/dev/mthca/mthca_provider.c +++ b/sys/dev/mthca/mthca_provider.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -298,17 +299,16 @@ return err; } -static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +static int mthca_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata) { - struct mthca_alloc_ucontext_resp uresp; - struct mthca_ucontext *context; + struct ib_device *ibdev = uctx->device; + struct mthca_alloc_ucontext_resp uresp = {}; + struct mthca_ucontext *context = to_mucontext(uctx); int err; if (!(to_mdev(ibdev)->active)) - return ERR_PTR(-EAGAIN); - - memset(&uresp, 0, sizeof uresp); + return -EAGAIN; uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; if (mthca_is_memfree(to_mdev(ibdev))) @@ -316,44 +316,33 @@ else uresp.uarc_size = 0; - context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); - if (err) { - kfree(context); - return ERR_PTR(err); - } + if (err) + return err; context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); if (IS_ERR(context->db_tab)) { err = PTR_ERR(context->db_tab); mthca_uar_free(to_mdev(ibdev), &context->uar); - kfree(context); - return ERR_PTR(err); + return err; } - if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); mthca_uar_free(to_mdev(ibdev), &context->uar); - kfree(context); - return ERR_PTR(-EFAULT); + return -EFAULT; } context->reg_mr_warned = 0; - return &context->ibucontext; + return 0; } -static int mthca_dealloc_ucontext(struct ib_ucontext *context) +static void mthca_dealloc_ucontext(struct ib_ucontext *context) { mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, to_mucontext(context)->db_tab); mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); - kfree(to_mucontext(context)); - - return 0; } static int mthca_mmap_uar(struct ib_ucontext *context, @@ -372,150 +361,115 @@ return 0; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { - struct mthca_pd *pd; + struct ib_device *ibdev = ibpd->device; + struct mthca_pd *pd = to_mpd(ibpd); int err; - pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); - - err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); - if (err) { - kfree(pd); - return ERR_PTR(err); - } + err = mthca_pd_alloc(to_mdev(ibdev), !udata, pd); + if (err) + return err; - if (context) { + if (udata) { if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { mthca_pd_free(to_mdev(ibdev), pd); - kfree(pd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } - return &pd->ibpd; + return 0; } -static int mthca_dealloc_pd(struct ib_pd *pd) +static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); - kfree(pd); - - return 0; } -static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr, - struct ib_udata *udata) -{ - int err; - struct mthca_ah *ah; - - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); +static int mthca_ah_create(struct ib_ah *ibah, + struct ib_ah_attr *init_attr, u32 flags, + struct ib_udata *udata) - err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); - if (err) { - kfree(ah); - return ERR_PTR(err); - } +{ + struct mthca_ah *ah = to_mah(ibah); - return &ah->ibah; + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), + init_attr, ah); } -static int mthca_ah_destroy(struct ib_ah *ah) +static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); - kfree(ah); - - return 0; } -static struct ib_srq *mthca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +static int mthca_create_srq(struct ib_srq *ibsrq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_create_srq ucmd; - struct mthca_ucontext *context = NULL; - struct mthca_srq *srq; + struct mthca_ucontext *context = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); + struct mthca_srq *srq = to_msrq(ibsrq); int err; if (init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); - - srq = kmalloc(sizeof *srq, GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - - if (pd->uobject) { - context = to_mucontext(pd->uobject->context); + return -EOPNOTSUPP; - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { - err = -EFAULT; - goto err_free; - } + if (udata) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; - err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + err = mthca_map_user_db(to_mdev(ibsrq->device), &context->uar, context->db_tab, ucmd.db_index, ucmd.db_page); if (err) - goto err_free; + return err; srq->mr.ibmr.lkey = ucmd.lkey; srq->db_index = ucmd.db_index; } - err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), - &init_attr->attr, srq); + err = mthca_alloc_srq(to_mdev(ibsrq->device), to_mpd(ibsrq->pd), + &init_attr->attr, srq, udata); - if (err && pd->uobject) - mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + if (err && udata) + mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar, context->db_tab, ucmd.db_index); if (err) - goto err_free; + return err; - if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { - mthca_free_srq(to_mdev(pd->device), srq); - err = -EFAULT; - goto err_free; + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + mthca_free_srq(to_mdev(ibsrq->device), srq); + return -EFAULT; } - return &srq->ibsrq; - -err_free: - kfree(srq); - - return ERR_PTR(err); + return 0; } -static int mthca_destroy_srq(struct ib_srq *srq) +static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { - struct mthca_ucontext *context; - - if (srq->uobject) { - context = to_mucontext(srq->uobject->context); + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); - kfree(srq); - - return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct mthca_ucontext *context = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); struct mthca_create_qp ucmd; struct mthca_qp *qp; int err; @@ -528,15 +482,11 @@ case IB_QPT_UC: case IB_QPT_UD: { - struct mthca_ucontext *context; - - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - if (pd->uobject) { - context = to_mucontext(pd->uobject->context); - + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { kfree(qp); return ERR_PTR(-EFAULT); @@ -571,11 +521,9 @@ to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - &init_attr->cap, qp); - - if (err && pd->uobject) { - context = to_mucontext(pd->uobject->context); + &init_attr->cap, qp, udata); + if (err && udata) { mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, @@ -592,11 +540,7 @@ case IB_QPT_SMI: case IB_QPT_GSI: { - /* Don't allow userspace to create special QPs */ - if (pd->uobject) - return ERR_PTR(-EINVAL); - - qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -607,7 +551,7 @@ to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp)); + to_msqp(qp), udata); break; } default: @@ -629,64 +573,68 @@ return &qp->ibqp; } -static int mthca_destroy_qp(struct ib_qp *qp) +static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { - if (qp->uobject) { + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); + mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mqp(qp)->sq.db_index); mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); - kfree(qp); + kfree(to_mqp(qp)); return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) +static int mthca_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + struct mthca_ucontext *context = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return ERR_PTR(-EINVAL); + return -EOPNOTSUPP; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) - return ERR_PTR(-EINVAL); + return -EINVAL; - if (context) { - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) - return ERR_PTR(-EFAULT); + if (udata) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, - ucmd.set_db_index, ucmd.set_db_page); + err = mthca_map_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.set_db_index, + ucmd.set_db_page); if (err) - return ERR_PTR(err); + return err; - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, - ucmd.arm_db_index, ucmd.arm_db_page); + err = mthca_map_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.arm_db_index, + ucmd.arm_db_page); if (err) goto err_unmap_set; } - cq = kmalloc(sizeof *cq, GFP_KERNEL); - if (!cq) { - err = -ENOMEM; - goto err_unmap_arm; - } + cq = to_mcq(ibcq); - if (context) { + if (udata) { cq->buf.mr.ibmr.lkey = ucmd.lkey; cq->set_ci_db_index = ucmd.set_db_index; cq->arm_db_index = ucmd.arm_db_index; @@ -695,37 +643,33 @@ for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, - context ? to_mucontext(context) : NULL, - context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + err = mthca_init_cq(to_mdev(ibdev), nent, context, + udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, cq); if (err) - goto err_free; + goto err_unmap_arm; - if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) { mthca_free_cq(to_mdev(ibdev), cq); err = -EFAULT; - goto err_free; + goto err_unmap_arm; } cq->resize_buf = NULL; - return &cq->ibcq; - -err_free: - kfree(cq); + return 0; err_unmap_arm: - if (context) - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, ucmd.arm_db_index); + if (udata) + mthca_unmap_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.arm_db_index); err_unmap_set: - if (context) - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, ucmd.set_db_index); + if (udata) + mthca_unmap_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.set_db_index); - return ERR_PTR(err); + return err; } static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq, @@ -849,22 +793,25 @@ return ret; } -static int mthca_destroy_cq(struct ib_cq *cq) +static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - if (cq->uobject) { + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); + mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mcq(cq)->arm_db_index); mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); - kfree(cq); - - return 0; } static inline u32 convert_access(int acc) @@ -999,13 +946,12 @@ return ERR_PTR(err); } -static int mthca_dereg_mr(struct ib_mr *mr) +static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata) { struct mthca_mr *mmr = to_mmr(mr); mthca_free_mr(to_mdev(mr->device), mmr); - if (mmr->umem) - ib_umem_release(mmr->umem); + ib_umem_release(mmr->umem); kfree(mmr); return 0; @@ -1196,6 +1142,13 @@ if (ret) return ret; +#define mthca_ib_ah mthca_ah +#define mthca_ib_cq mthca_cq +#define mthca_ib_pd mthca_pd +#define mthca_ib_qp mthca_qp +#define mthca_ib_srq mthca_srq +#define mthca_ib_ucontext mthca_ucontext + INIT_IB_DEVICE_OPS(&dev->ib_dev.ops, mthca, MTHCA); strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; diff --git a/sys/dev/mthca/mthca_qp.c b/sys/dev/mthca/mthca_qp.c --- a/sys/dev/mthca/mthca_qp.c +++ b/sys/dev/mthca/mthca_qp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "mthca_dev.h" #include "mthca_cmd.h" @@ -542,10 +543,14 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state) + enum ib_qp_state cur_state, + enum ib_qp_state new_state, + struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); + struct mthca_ucontext *context = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); struct mthca_mailbox *mailbox; struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; @@ -607,8 +612,7 @@ /* leave arbel_sched_queue as 0 */ if (qp->ibqp.uobject) - qp_context->usr_page = - cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + qp_context->usr_page = cpu_to_be32(context->uar.index); else qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); qp_context->local_qpn = cpu_to_be32(qp->qpn); @@ -900,7 +904,8 @@ goto out; } - err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state, + udata); out: mutex_unlock(&qp->mutex); @@ -968,7 +973,8 @@ */ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int size; int err = -ENOMEM; @@ -1035,7 +1041,7 @@ * allocate anything. All we need is to calculate the WQE * sizes and the send_wqe_offset, so we're done now. */ - if (pd->ibpd.uobject) + if (udata) return 0; size = PAGE_ALIGN(qp->send_wqe_offset + @@ -1141,7 +1147,8 @@ struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int ret; int i; @@ -1164,7 +1171,7 @@ if (ret) return ret; - ret = mthca_alloc_wqe_buf(dev, pd, qp); + ret = mthca_alloc_wqe_buf(dev, pd, qp, udata); if (ret) { mthca_unmap_memfree(dev, qp); return ret; @@ -1177,7 +1184,7 @@ * will be allocated and buffers will be initialized in * userspace. */ - if (pd->ibpd.uobject) + if (udata) return 0; ret = mthca_alloc_memfree(dev, qp); @@ -1270,7 +1277,8 @@ enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int err; @@ -1293,7 +1301,7 @@ qp->port = 0; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, qp); + send_policy, qp, udata); if (err) { mthca_free(&dev->qp_table.alloc, qp->qpn); return err; @@ -1345,7 +1353,8 @@ struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp) + struct mthca_sqp *sqp, + struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; @@ -1376,7 +1385,7 @@ sqp->qp.transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp); + send_policy, &sqp->qp, udata); if (err) goto err_out_free; diff --git a/sys/dev/mthca/mthca_srq.c b/sys/dev/mthca/mthca_srq.c --- a/sys/dev/mthca/mthca_srq.c +++ b/sys/dev/mthca/mthca_srq.c @@ -36,6 +36,8 @@ #include +#include + #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" @@ -95,17 +97,20 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_tavor_srq_context *context) + struct mthca_tavor_srq_context *context, + struct ib_udata *udata) { + struct mthca_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); + memset(context, 0, sizeof *context); context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); context->state_pd = cpu_to_be32(pd->pd_num); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); - if (pd->ibpd.uobject) - context->uar = - cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + if (udata) + context->uar = cpu_to_be32(ucontext->uar.index); else context->uar = cpu_to_be32(dev->driver_uar.index); } @@ -113,8 +118,11 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_arbel_srq_context *context) + struct mthca_arbel_srq_context *context, + struct ib_udata *udata) { + struct mthca_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); int logsize; memset(context, 0, sizeof *context); @@ -123,9 +131,8 @@ context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); - if (pd->ibpd.uobject) - context->logstride_usrpage |= - cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + if (udata) + context->logstride_usrpage |= cpu_to_be32(ucontext->uar.index); else context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); @@ -139,14 +146,14 @@ } static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_srq *srq) + struct mthca_srq *srq, struct ib_udata *udata) { struct mthca_data_seg *scatter; void *wqe; int err; int i; - if (pd->ibpd.uobject) + if (udata) return 0; srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); @@ -191,7 +198,8 @@ } int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq) + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata) { struct mthca_mailbox *mailbox; int ds; @@ -229,7 +237,7 @@ if (err) goto err_out; - if (!pd->ibpd.uobject) { + if (!udata) { srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, srq->srqn, &srq->db); if (srq->db_index < 0) { @@ -245,7 +253,7 @@ goto err_out_db; } - err = mthca_alloc_srq_buf(dev, pd, srq); + err = mthca_alloc_srq_buf(dev, pd, srq, udata); if (err) goto err_out_mailbox; @@ -255,9 +263,9 @@ mutex_init(&srq->mutex); if (mthca_is_memfree(dev)) - mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata); else - mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata); err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); @@ -291,14 +299,14 @@ mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); err_out_free_buf: - if (!pd->ibpd.uobject) + if (!udata) mthca_free_srq_buf(dev, srq); err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_db: - if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + if (!udata && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); err_out_icm: diff --git a/sys/dev/qlnx/qlnxr/qlnxr_def.h b/sys/dev/qlnx/qlnxr/qlnxr_def.h --- a/sys/dev/qlnx/qlnxr/qlnxr_def.h +++ b/sys/dev/qlnx/qlnxr/qlnxr_def.h @@ -65,6 +65,7 @@ #include #include #include +#include #if __FreeBSD_version < 1100000 #undef MODULE_VERSION diff --git a/sys/dev/qlnx/qlnxr/qlnxr_os.c b/sys/dev/qlnx/qlnxr/qlnxr_os.c --- a/sys/dev/qlnx/qlnxr/qlnxr_os.c +++ b/sys/dev/qlnx/qlnxr/qlnxr_os.c @@ -156,6 +156,13 @@ ibdev = &dev->ibdev; +#define qlnxr_ib_ah qlnxr_ah +#define qlnxr_ib_cq qlnxr_cq +#define qlnxr_ib_pd qlnxr_pd +#define qlnxr_ib_qp qlnxr_qp +#define qlnxr_ib_srq qlnxr_srq +#define qlnxr_ib_ucontext qlnxr_ucontext + INIT_IB_DEVICE_OPS(&ibdev->ops, qlnxr, QLNXR); strlcpy(ibdev->name, "qlnxr%d", IB_DEVICE_NAME_MAX); memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid)); diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.h b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h --- a/sys/dev/qlnx/qlnxr/qlnxr_verbs.h +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.h @@ -40,11 +40,12 @@ int index, union ib_gid *gid); -extern struct ib_srq *qlnxr_create_srq(struct ib_pd *, +extern int qlnxr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *, struct ib_udata *); -extern int qlnxr_destroy_srq(struct ib_srq *); +extern void qlnxr_destroy_srq(struct ib_srq *, + struct ib_udata *); extern int qlnxr_modify_srq(struct ib_srq *, struct ib_srq_attr *, @@ -79,33 +80,15 @@ extern enum rdma_link_layer qlnxr_link_layer(struct ib_device *device, uint8_t port_num); -struct ib_pd *qlnxr_alloc_pd(struct ib_device *, - struct ib_ucontext *, - struct ib_udata *); +extern int qlnxr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *); -extern int qlnxr_dealloc_pd(struct ib_pd *pd); +extern void qlnxr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -#if __FreeBSD_version >= 1102000 -extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, - struct ib_udata *udata); -#else -#if __FreeBSD_version >= 1100000 -extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, - struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, - struct ib_udata *udata); -#else -extern struct ib_cq *qlnxr_create_cq(struct ib_device *ibdev, - int cqe, - int comp_vector, - struct ib_ucontext *ib_ctx, - struct ib_udata *udata); -#endif -#endif /* #if __FreeBSD_version >= 1102000 */ +extern int qlnxr_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata); -extern int qlnxr_destroy_cq(struct ib_cq *); +extern void qlnxr_destroy_cq(struct ib_cq *, struct ib_udata *); extern int qlnxr_resize_cq(struct ib_cq *, int cqe, @@ -129,22 +112,17 @@ int qp_attr_mask, struct ib_qp_init_attr *); -extern int qlnxr_destroy_qp(struct ib_qp *); +extern int qlnxr_destroy_qp(struct ib_qp *, struct ib_udata *); extern int qlnxr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); -#if __FreeBSD_version >= 1102000 -extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd, - struct ib_ah_attr *attr, struct ib_udata *udata); -#else -extern struct ib_ah *qlnxr_create_ah(struct ib_pd *ibpd, - struct ib_ah_attr *attr); -#endif /* #if __FreeBSD_version >= 1102000 */ - -extern int qlnxr_destroy_ah(struct ib_ah *ibah); +extern int qlnxr_create_ah(struct ib_ah *ibah, + struct ib_ah_attr *attr, u32 flags, + struct ib_udata *udata); +extern void qlnxr_destroy_ah(struct ib_ah *ibah, u32 flags); extern int qlnxr_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr); @@ -195,7 +173,7 @@ u64 *iova_start); #endif /* #if __FreeBSD_version < 1102000 */ -extern int qlnxr_dereg_mr(struct ib_mr *); +extern int qlnxr_dereg_mr(struct ib_mr *, struct ib_udata *); #if __FreeBSD_version >= 1102000 extern struct ib_mr *qlnxr_reg_user_mr(struct ib_pd *, @@ -217,7 +195,9 @@ #if __FreeBSD_version >= 1102000 extern struct ib_mr *qlnxr_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, u32 max_num_sg); + enum ib_mr_type mr_type, u32 max_num_sg, + struct ib_udata *udata); + extern int qlnxr_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); #else @@ -233,10 +213,10 @@ #endif /* #if __FreeBSD_version >= 1102000 */ -extern struct ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata); +extern int qlnxr_alloc_ucontext(struct ib_ucontext *uctx, + struct ib_udata *udata); -extern int qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx); +extern void qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx); extern int qlnxr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); diff --git a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c --- a/sys/dev/qlnx/qlnxr/qlnxr_verbs.c +++ b/sys/dev/qlnx/qlnxr/qlnxr_verbs.c @@ -71,8 +71,7 @@ ((unsigned char *)&addr)[3] static int -qlnxr_check_srq_params(struct ib_pd *ibpd, - struct qlnxr_dev *dev, +qlnxr_check_srq_params(struct qlnxr_dev *dev, struct ib_srq_init_attr *attrs); static int @@ -159,9 +158,10 @@ return 0; } -struct ib_srq * -qlnxr_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int +qlnxr_create_srq(struct ib_srq *ibsrq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct qlnxr_dev *dev; qlnx_host_t *ha; @@ -169,36 +169,28 @@ struct ecore_rdma_create_srq_out_params out_params; struct ecore_rdma_create_srq_in_params in_params; u64 pbl_base_addr, phy_prod_pair_addr; - struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); - struct ib_ucontext *ib_ctx = NULL; struct qlnxr_srq_hwq_info *hw_srq; - struct qlnxr_ucontext *ctx = NULL; + struct qlnxr_ucontext *ctx; struct qlnxr_create_srq_ureq ureq; u32 page_cnt, page_size; - struct qlnxr_srq *srq; + struct qlnxr_srq *srq = get_qlnxr_srq(ibsrq); int ret = 0; - dev = get_qlnxr_dev((ibpd->device)); + dev = get_qlnxr_dev(ibsrq->device); ha = dev->ha; QL_DPRINT12(ha, "enter\n"); - ret = qlnxr_check_srq_params(ibpd, dev, init_attr); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - QL_DPRINT11(ha, "cannot allocate memory for srq\n"); - return NULL; //@@@ : TODO what to return here? - } + ret = qlnxr_check_srq_params(dev, init_attr); srq->dev = dev; hw_srq = &srq->hw_srq; spin_lock_init(&srq->lock); memset(&in_params, 0, sizeof(in_params)); - if (udata && ibpd->uobject && ibpd->uobject->context) { - ib_ctx = ibpd->uobject->context; - ctx = get_qlnxr_ucontext(ib_ctx); + if (udata) { + ctx = rdma_udata_to_drv_context( + udata, struct qlnxr_ucontext, ibucontext); memset(&ureq, 0, sizeof(ureq)); if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), @@ -208,7 +200,7 @@ goto err0; } - ret = qlnxr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + ret = qlnxr_init_srq_user_params(&ctx->ibucontext, srq, &ureq, 0, 0); if (ret) goto err0; @@ -232,7 +224,7 @@ page_size = pbl->elem_per_page << 4; } - in_params.pd_id = pd->pd_id; + in_params.pd_id = get_qlnxr_pd(ibsrq->pd)->pd_id; in_params.pbl_base_addr = pbl_base_addr; in_params.prod_pair_addr = phy_prod_pair_addr; in_params.num_pages = page_cnt; @@ -251,7 +243,7 @@ } QL_DPRINT12(ha, "created srq with srq_id = 0x%0x\n", srq->srq_id); - return &srq->ibsrq; + return (0); err2: memset(&in_params, 0, sizeof(in_params)); destroy_in_params.srq_id = srq->srq_id; @@ -264,12 +256,11 @@ qlnxr_free_srq_kernel_params(srq); err0: - kfree(srq); - return ERR_PTR(-EFAULT); + return (-EFAULT); } -int -qlnxr_destroy_srq(struct ib_srq *ibsrq) +void +qlnxr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qlnxr_dev *dev; struct qlnxr_srq *srq; @@ -291,8 +282,6 @@ qlnxr_free_srq_kernel_params(srq); QL_DPRINT12(ha, "destroyed srq_id=0x%0x\n", srq->srq_id); - kfree(srq); - return 0; } int @@ -718,11 +707,11 @@ return IB_LINK_LAYER_ETHERNET; } -struct ib_pd * -qlnxr_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, - struct ib_udata *udata) +int +qlnxr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { - struct qlnxr_pd *pd = NULL; + struct ib_device *ibdev = ibpd->device; + struct qlnxr_pd *pd = get_qlnxr_pd(ibpd); u16 pd_id; int rc; struct qlnxr_dev *dev; @@ -731,8 +720,7 @@ dev = get_qlnxr_dev(ibdev); ha = dev->ha; - QL_DPRINT12(ha, "ibdev = %p context = %p" - " udata = %p enter\n", ibdev, context, udata); + QL_DPRINT12(ha, "ibdev = %p udata = %p enter\n", ibdev, udata); if (dev->rdma_ctx == NULL) { QL_DPRINT11(ha, "dev->rdma_ctx = NULL\n"); @@ -740,13 +728,6 @@ goto err; } - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - rc = -ENOMEM; - QL_DPRINT11(ha, "kzalloc(pd) = NULL\n"); - goto err; - } - rc = ecore_rdma_alloc_pd(dev->rdma_ctx, &pd_id); if (rc) { QL_DPRINT11(ha, "ecore_rdma_alloc_pd failed\n"); @@ -755,7 +736,7 @@ pd->pd_id = pd_id; - if (udata && context) { + if (udata) { rc = ib_copy_to_udata(udata, &pd->pd_id, sizeof(pd->pd_id)); if (rc) { QL_DPRINT11(ha, "ib_copy_to_udata failed\n"); @@ -763,7 +744,8 @@ goto err; } - pd->uctx = get_qlnxr_ucontext(context); + pd->uctx = rdma_udata_to_drv_context( + udata, struct qlnxr_ucontext, ibucontext); pd->uctx->pd = pd; } @@ -771,16 +753,15 @@ QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n", pd, pd_id, dev->pd_count); - return &pd->ibpd; + return (0); err: - kfree(pd); QL_DPRINT12(ha, "exit -1\n"); - return ERR_PTR(rc); + return (rc); } -int -qlnxr_dealloc_pd(struct ib_pd *ibpd) +void +qlnxr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct qlnxr_pd *pd; struct qlnxr_dev *dev; @@ -796,14 +777,12 @@ QL_DPRINT11(ha, "pd = NULL\n"); } else { ecore_rdma_free_pd(dev->rdma_ctx, pd->pd_id); - kfree(pd); atomic_subtract_rel_32(&dev->pd_count, 1); QL_DPRINT12(ha, "exit [pd, pd_id, pd_count] = [%p, 0x%x, %d]\n", pd, pd->pd_id, dev->pd_count); } QL_DPRINT12(ha, "exit\n"); - return 0; } #define ROCE_WQE_ELEM_SIZE sizeof(struct rdma_sq_sge) @@ -977,24 +956,18 @@ return found; } -struct -ib_ucontext *qlnxr_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) +int +qlnxr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { int rc; - struct qlnxr_ucontext *ctx; + struct qlnxr_ucontext *ctx = get_qlnxr_ucontext(uctx); struct qlnxr_alloc_ucontext_resp uresp; - struct qlnxr_dev *dev = get_qlnxr_dev(ibdev); + struct qlnxr_dev *dev = get_qlnxr_dev(uctx->device); qlnx_host_t *ha = dev->ha; struct ecore_rdma_add_user_out_params oparams; - if (!udata) { - return ERR_PTR(-EFAULT); - } - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); + if (!udata) + return -EFAULT; rc = ecore_rdma_add_user(dev->rdma_ctx, &oparams); if (rc) { @@ -1044,20 +1017,18 @@ QL_DPRINT12(ha, "Allocated user context %p\n", &ctx->ibucontext); - return &ctx->ibucontext; + return (0); err: - kfree(ctx); - return ERR_PTR(rc); + return (rc); } -int +void qlnxr_dealloc_ucontext(struct ib_ucontext *ibctx) { struct qlnxr_ucontext *uctx = get_qlnxr_ucontext(ibctx); struct qlnxr_dev *dev = uctx->dev; qlnx_host_t *ha = dev->ha; struct qlnxr_mm *mm, *tmp; - int status = 0; QL_DPRINT12(ha, "Deallocating user context %p\n", uctx); @@ -1073,8 +1044,6 @@ list_del(&mm->entry); kfree(mm); } - kfree(uctx); - return status; } int @@ -1662,7 +1631,7 @@ } int -qlnxr_dereg_mr(struct ib_mr *ib_mr) +qlnxr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct qlnxr_mr *mr = get_qlnxr_mr(ib_mr); struct qlnxr_dev *dev = get_qlnxr_dev((ib_mr->device)); @@ -1822,35 +1791,10 @@ return rc; } -#if __FreeBSD_version >= 1102000 - -struct ib_cq * -qlnxr_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, - struct ib_udata *udata) - -#else - -#if __FreeBSD_version >= 1100000 - -struct ib_cq * -qlnxr_create_cq(struct ib_device *ibdev, - struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, - struct ib_udata *udata) - -#else - -struct ib_cq * -qlnxr_create_cq(struct ib_device *ibdev, - int entries, - int vector, - struct ib_ucontext *ib_ctx, - struct ib_udata *udata) -#endif /* #if __FreeBSD_version >= 1100000 */ - -#endif /* #if __FreeBSD_version >= 1102000 */ +int +qlnxr_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { struct qlnxr_ucontext *ctx; struct ecore_rdma_destroy_cq_out_params destroy_oparams; @@ -1863,13 +1807,13 @@ int vector = attr->comp_vector; int entries = attr->cqe; #endif - struct qlnxr_cq *cq; + struct qlnxr_cq *cq = get_qlnxr_cq(ibcq); int chain_entries, rc, page_cnt; u64 pbl_ptr; u16 icid; qlnx_host_t *ha; - dev = get_qlnxr_dev(ibdev); + dev = get_qlnxr_dev(ibcq->device); ha = dev->ha; QL_DPRINT12(ha, "called from %s. entries = %d, " @@ -1885,17 +1829,15 @@ "the number of entries %d is too high. " "Must be equal or below %d.\n", entries, QLNXR_MAX_CQES); - return ERR_PTR(-EINVAL); + return -EINVAL; } chain_entries = qlnxr_align_cq_entries(entries); chain_entries = min_t(int, chain_entries, QLNXR_MAX_CQES); - cq = qlnx_zalloc((sizeof(struct qlnxr_cq))); - - if (!cq) - return ERR_PTR(-ENOMEM); - if (udata) { + ctx = rdma_udata_to_drv_context( + udata, struct qlnxr_ucontext, ibucontext); + memset(&ureq, 0, sizeof(ureq)); if (ib_copy_from_udata(&ureq, udata, @@ -1911,13 +1853,15 @@ cq->cq_type = QLNXR_CQ_TYPE_USER; - qlnxr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, ureq.len, + qlnxr_init_user_queue(&ctx->ibucontext, dev, &cq->q, ureq.addr, ureq.len, IB_ACCESS_LOCAL_WRITE, 1, 1); pbl_ptr = cq->q.pbl_tbl->pa; page_cnt = cq->q.pbl_info.num_pbes; cq->ibcq.cqe = chain_entries; } else { + ctx = NULL; + cq->cq_type = QLNXR_CQ_TYPE_KERNEL; rc = ecore_chain_alloc(&dev->ha->cdev, @@ -1944,8 +1888,7 @@ params.pbl_ptr = pbl_ptr; params.pbl_two_level = 0; - if (ib_ctx != NULL) { - ctx = get_qlnxr_ucontext(ib_ctx); + if (udata) { params.dpi = ctx->dpi; } else { params.dpi = dev->dpi; @@ -1959,7 +1902,7 @@ cq->sig = QLNXR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); - if (ib_ctx) { + if (udata) { rc = qlnxr_copy_cq_uresp(dev, cq, udata); if (rc) goto err3; @@ -1990,7 +1933,7 @@ " number of entries = 0x%x\n", cq->icid, cq, params.cq_size); QL_DPRINT12(ha,"cq_addr = %p\n", cq); - return &cq->ibcq; + return (0); err3: destroy_iparams.icid = cq->icid; @@ -2004,11 +1947,9 @@ if (udata) ib_umem_release(cq->q.umem); err0: - kfree(cq); - QL_DPRINT12(ha, "exit error\n"); - return ERR_PTR(-EINVAL); + return (-EINVAL); } int qlnxr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) @@ -2024,8 +1965,8 @@ return status; } -int -qlnxr_destroy_cq(struct ib_cq *ibcq) +void +qlnxr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qlnxr_dev *dev = get_qlnxr_dev((ibcq->device)); struct ecore_rdma_destroy_cq_out_params oparams; @@ -2055,25 +1996,21 @@ if (rc) { QL_DPRINT12(ha, "ecore_rdma_destroy_cq failed cq_id = %d\n", cq->icid); - return rc; + return; } QL_DPRINT12(ha, "free cq->pbl cq_id = %d\n", cq->icid); ecore_chain_free(&dev->ha->cdev, &cq->pbl); } - if (ibcq->uobject && ibcq->uobject->context) { + if (udata) { qlnxr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); } cq->sig = ~cq->sig; - kfree(cq); - QL_DPRINT12(ha, "exit cq_id = %d\n", cq->icid); - - return rc; } static int @@ -2393,8 +2330,7 @@ } static int -qlnxr_check_srq_params(struct ib_pd *ibpd, - struct qlnxr_dev *dev, +qlnxr_check_srq_params(struct qlnxr_dev *dev, struct ib_srq_init_attr *attrs) { struct ecore_rdma_device *qattr; @@ -3281,8 +3217,6 @@ return &qp->ibqp; err: - kfree(qp); - QL_DPRINT12(ha, "failed exit\n"); return ERR_PTR(-EFAULT); } @@ -3987,9 +3921,9 @@ return; } -int +static int qlnxr_free_qp_resources(struct qlnxr_dev *dev, - struct qlnxr_qp *qp) + struct qlnxr_qp *qp, struct ib_udata *udata) { int rc = 0; qlnx_host_t *ha; @@ -4007,13 +3941,13 @@ return rc; } - if (qp->ibqp.uobject && qp->ibqp.uobject->context) + if (udata) qlnxr_cleanup_user(dev, qp); else qlnxr_cleanup_kernel(dev, qp); #endif - if (qp->ibqp.uobject && qp->ibqp.uobject->context) + if (udata) qlnxr_cleanup_user(dev, qp); else qlnxr_cleanup_kernel(dev, qp); @@ -4030,7 +3964,7 @@ } int -qlnxr_destroy_qp(struct ib_qp *ibqp) +qlnxr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct qlnxr_qp *qp = get_qlnxr_qp(ibqp); struct qlnxr_dev *dev = qp->dev; @@ -4060,12 +3994,11 @@ qp->sig = ~qp->sig; - qlnxr_free_qp_resources(dev, qp); + qlnxr_free_qp_resources(dev, qp, udata); if (atomic_dec_and_test(&qp->refcnt)) { /* TODO: only for iWARP? */ qlnxr_idr_remove(dev, qp->qp_id); - kfree(qp); } QL_DPRINT12(ha, "exit\n"); @@ -5837,7 +5770,8 @@ #if __FreeBSD_version >= 1102000 struct ib_mr * -qlnxr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) +qlnxr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct qlnxr_dev *dev; struct qlnxr_mr *mr; @@ -6266,48 +6200,28 @@ #endif /* #if __FreeBSD_version >= 1102000 */ -struct ib_ah * -#if __FreeBSD_version >= 1102000 -qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, +int +qlnxr_create_ah(struct ib_ah *ibah, + struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata) -#else -qlnxr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) -#endif /* #if __FreeBSD_version >= 1102000 */ { struct qlnxr_dev *dev; qlnx_host_t *ha; - struct qlnxr_ah *ah; + struct qlnxr_ah *ah = get_qlnxr_ah(ibah); - dev = get_qlnxr_dev((ibpd->device)); + dev = get_qlnxr_dev(ibah->device); ha = dev->ha; QL_DPRINT12(ha, "in create_ah\n"); - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) { - QL_DPRINT12(ha, "no address handle can be allocated\n"); - return ERR_PTR(-ENOMEM); - } - ah->attr = *attr; - return &ah->ibah; + return (0); } -int -qlnxr_destroy_ah(struct ib_ah *ibah) +void +qlnxr_destroy_ah(struct ib_ah *ibah, u32 flags) { - struct qlnxr_dev *dev; - qlnx_host_t *ha; - struct qlnxr_ah *ah = get_qlnxr_ah(ibah); - - dev = get_qlnxr_dev((ibah->device)); - ha = dev->ha; - - QL_DPRINT12(ha, "in destroy_ah\n"); - - kfree(ah); - return 0; } int @@ -7187,7 +7101,6 @@ if (atomic_dec_and_test(&qp->refcnt)) { qlnxr_idr_remove(qp->dev, qp->qp_id); - kfree(qp); } QL_DPRINT12(ha, "exit \n"); diff --git a/sys/modules/ibcore/Makefile b/sys/modules/ibcore/Makefile --- a/sys/modules/ibcore/Makefile +++ b/sys/modules/ibcore/Makefile @@ -8,6 +8,7 @@ ib_cache.c \ ib_cm.c \ ib_cma.c \ + ib_core_uverbs.c \ ib_cq.c \ ib_device.c \ ib_fmr_pool.c \ @@ -18,6 +19,7 @@ ib_mad_rmpp.c \ ib_multicast.c \ ib_packer.c \ + ib_rdma_core.c \ ib_roce_gid_mgmt.c \ ib_sa_query.c \ ib_smi.c \ @@ -28,9 +30,20 @@ ib_umem.c \ ib_user_mad.c \ ib_uverbs_cmd.c \ + ib_uverbs_ioctl.c \ ib_uverbs_main.c \ ib_uverbs_marshall.c \ + ib_uverbs_std_types.c \ + ib_uverbs_std_types_async_fd.c \ + ib_uverbs_std_types_counters.c \ + ib_uverbs_std_types_cq.c \ + ib_uverbs_std_types_device.c \ + ib_uverbs_std_types_dm.c \ + ib_uverbs_std_types_flow_action.c \ + ib_uverbs_std_types_mr.c \ + ib_uverbs_uapi.c \ ib_verbs.c + SRCS+= ${LINUXKPI_GENSRCS} SRCS+= opt_inet.h opt_inet6.h diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile --- a/sys/modules/mlx5/Makefile +++ b/sys/modules/mlx5/Makefile @@ -38,6 +38,7 @@ SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h CFLAGS+= -I${SRCTOP}/sys/ofed/include +CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include .if defined(CONFIG_BUILD_FPGA) diff --git a/sys/modules/mlx5en/Makefile b/sys/modules/mlx5en/Makefile --- a/sys/modules/mlx5en/Makefile +++ b/sys/modules/mlx5en/Makefile @@ -29,6 +29,7 @@ .endif CFLAGS+= -I${SRCTOP}/sys/ofed/include +CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include .include diff --git a/sys/modules/mlx5fpga_tools/Makefile b/sys/modules/mlx5fpga_tools/Makefile --- a/sys/modules/mlx5fpga_tools/Makefile +++ b/sys/modules/mlx5fpga_tools/Makefile @@ -10,6 +10,7 @@ SRCS+= opt_inet.h opt_inet6.h opt_rss.h opt_ratelimit.h CFLAGS+= -I${SRCTOP}/sys/ofed/include +CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include .include diff --git a/sys/modules/mlx5ib/Makefile b/sys/modules/mlx5ib/Makefile --- a/sys/modules/mlx5ib/Makefile +++ b/sys/modules/mlx5ib/Makefile @@ -6,6 +6,7 @@ mlx5_ib_ah.c \ mlx5_ib_cong.c \ mlx5_ib_cq.c \ +mlx5_ib_devx.c \ mlx5_ib_doorbell.c \ mlx5_ib_gsi.c \ mlx5_ib_mad.c \ diff --git a/sys/modules/mlxfw/Makefile b/sys/modules/mlxfw/Makefile --- a/sys/modules/mlxfw/Makefile +++ b/sys/modules/mlxfw/Makefile @@ -10,6 +10,7 @@ CFLAGS+= \ -I${SRCTOP}/sys/ofed/include \ + -I${SRCTOP}/sys/ofed/include/uapi \ -I${SRCTOP}/sys/compat/linuxkpi/common/include \ -I${SRCTOP}/sys/contrib/xz-embedded/freebsd \ -I${SRCTOP}/sys/contrib/xz-embedded/linux/lib/xz diff --git a/sys/ofed/drivers/infiniband/core/core_priv.h b/sys/ofed/drivers/infiniband/core/core_priv.h --- a/sys/ofed/drivers/infiniband/core/core_priv.h +++ b/sys/ofed/drivers/infiniband/core/core_priv.h @@ -44,6 +44,9 @@ #include +/* Total number of ports combined across all struct ib_devices's */ +#define RDMA_MAX_PORTS 8192 + #ifdef CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS int cma_configfs_init(void); void cma_configfs_exit(void); @@ -128,6 +131,8 @@ void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +#define ib_rdmacg_try_charge(...) ({ 0; }) + int addr_init(void); void addr_cleanup(void); @@ -142,4 +147,48 @@ const char *name); void ib_port_unregister_module_stat(struct kobject *kobj); +static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, + struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata, + struct ib_uqp_object *uobj) +{ + struct ib_qp *qp; + + if (!dev->create_qp) + return ERR_PTR(-EOPNOTSUPP); + + qp = dev->create_qp(pd, attr, udata); + if (IS_ERR(qp)) + return qp; + + qp->device = dev; + qp->pd = pd; + qp->uobject = uobj; + qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + qp->srq = attr->srq; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->event_handler = attr->event_handler; + + atomic_set(&qp->usecnt, 0); + spin_lock_init(&qp->mr_lock); + + return qp; +} + +struct rdma_umap_priv { + struct vm_area_struct *vma; + struct list_head list; + struct rdma_user_mmap_entry *entry; +}; + +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry); + #endif /* _CORE_PRIV_H */ diff --git a/sys/ofed/drivers/infiniband/core/ib_agent.c b/sys/ofed/drivers/infiniband/core/ib_agent.c --- a/sys/ofed/drivers/infiniband/core/ib_agent.c +++ b/sys/ofed/drivers/infiniband/core/ib_agent.c @@ -141,13 +141,13 @@ err2: ib_free_send_mad(send_buf); err1: - ib_destroy_ah(ah); + ib_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); } static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_send_wc->send_buf); } diff --git a/sys/ofed/drivers/infiniband/core/ib_cm.c b/sys/ofed/drivers/infiniband/core/ib_cm.c --- a/sys/ofed/drivers/infiniband/core/ib_cm.c +++ b/sys/ofed/drivers/infiniband/core/ib_cm.c @@ -351,7 +351,7 @@ ret = -ENODEV; goto out; } - ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); + ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr, 0); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto out; @@ -363,7 +363,7 @@ GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { - ib_destroy_ah(ah); + ib_destroy_ah(ah, 0); ret = PTR_ERR(m); goto out; } @@ -408,7 +408,7 @@ static void cm_free_msg(struct ib_mad_send_buf *msg) { if (msg->ah) - ib_destroy_ah(msg->ah); + ib_destroy_ah(msg->ah, 0); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); diff --git a/sys/ofed/drivers/infiniband/core/ib_core_uverbs.c b/sys/ofed/drivers/infiniband/core/ib_core_uverbs.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_core_uverbs.c @@ -0,0 +1,390 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2019 Marvell. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include "uverbs.h" +#include "core_priv.h" + +/** + * rdma_umap_priv_init() - Initialize the private data of a vma + * + * @priv: The already allocated private data + * @vma: The vm area struct that needs private data + * @entry: entry into the mmap_xa that needs to be linked with + * this vma + * + * Each time we map IO memory into user space this keeps track of the + * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space + * to point to the zero page and allow the hot unplug to proceed. + * + * This is necessary for cases like PCI physical hot unplug as the actual BAR + * memory may vanish after this and access to it from userspace could MCE. + * + * RDMA drivers supporting disassociation must have their user space designed + * to cope in some way with their IO pages going to the zero page. + * + */ +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + + priv->vma = vma; + if (entry) { + kref_get(&entry->ref); + priv->entry = entry; + } + vma->vm_private_data = priv; + /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ + + mutex_lock(&ufile->umap_lock); + list_add(&priv->list, &ufile->umaps); + mutex_unlock(&ufile->umap_lock); +} +EXPORT_SYMBOL(rdma_umap_priv_init); + +/** + * rdma_user_mmap_io() - Map IO memory into a process + * + * @ucontext: associated user context + * @vma: the vma related to the current mmap call + * @pfn: pfn to map + * @size: size to map + * @prot: pgprot to use in remap call + * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL + * if mmap_entry is not used by the driver + * + * This is to be called by drivers as part of their mmap() functions if they + * wish to send something like PCI-E BAR memory to userspace. + * + * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on + * success. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + struct rdma_umap_priv *priv; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != size) + return -EINVAL; + + /* Driver is using this wrong, must be called by ib_uverbs_mmap */ + if (WARN_ON(!vma->vm_file || + vma->vm_file->private_data != ufile)) + return -EINVAL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + vma->vm_page_prot = prot; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { + kfree(priv); + return -EAGAIN; + } + + rdma_umap_priv_init(priv, vma, entry); + return 0; +} +EXPORT_SYMBOL(rdma_user_mmap_io); + +/** + * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @pgoff: The mmap offset >> PAGE_SHIFT + * + * This function is called when a user tries to mmap with an offset (returned + * by rdma_user_mmap_get_offset()) it initially received from the driver. The + * rdma_user_mmap_entry was created by the function + * rdma_user_mmap_entry_insert(). This function increases the refcnt of the + * entry so that it won't be deleted from the xarray in the meantime. + * + * Return an reference to an entry if exists or NULL if there is no + * match. rdma_user_mmap_entry_put() must be called to put the reference. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff) +{ + struct rdma_user_mmap_entry *entry; + + if (pgoff > U32_MAX) + return NULL; + + xa_lock(&ucontext->mmap_xa); + + entry = xa_load(&ucontext->mmap_xa, pgoff); + + /* + * If refcount is zero, entry is already being deleted, driver_removed + * indicates that the no further mmaps are possible and we waiting for + * the active VMAs to be closed. + */ + if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || + !kref_get_unless_zero(&entry->ref)) + goto err; + + xa_unlock(&ucontext->mmap_xa); + + return entry; + +err: + xa_unlock(&ucontext->mmap_xa); + return NULL; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); + +/** + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @vma: the vma being mmap'd into + * + * This function is like rdma_user_mmap_entry_get_pgoff() except that it also + * checks that the VMA is correct. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma) +{ + struct rdma_user_mmap_entry *entry; + + if (!(vma->vm_flags & VM_SHARED)) + return NULL; + entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); + if (!entry) + return NULL; + if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { + rdma_user_mmap_entry_put(entry); + return NULL; + } + return entry; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get); + +static void rdma_user_mmap_entry_free(struct kref *kref) +{ + struct rdma_user_mmap_entry *entry = + container_of(kref, struct rdma_user_mmap_entry, ref); + struct ib_ucontext *ucontext = entry->ucontext; + unsigned long i; + + /* + * Erase all entries occupied by this single entry, this is deferred + * until all VMA are closed so that the mmap offsets remain unique. + */ + xa_lock(&ucontext->mmap_xa); + for (i = 0; i < entry->npages; i++) + __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); + xa_unlock(&ucontext->mmap_xa); + + if (ucontext->device->mmap_free) + ucontext->device->mmap_free(entry); +} + +/** + * rdma_user_mmap_entry_put() - Drop reference to the mmap entry + * + * @entry: an entry in the mmap_xa + * + * This function is called when the mapping is closed if it was + * an io mapping or when the driver is done with the entry for + * some other reason. + * Should be called after rdma_user_mmap_entry_get was called + * and entry is no longer needed. This function will erase the + * entry and free it if its refcnt reaches zero. + */ +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) +{ + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_put); + +/** + * rdma_user_mmap_entry_remove() - Drop reference to entry and + * mark it as unmmapable + * + * @entry: the entry to insert into the mmap_xa + * + * Drivers can call this to prevent userspace from creating more mappings for + * entry, however existing mmaps continue to exist and ops->mmap_free() will + * not be called until all user mmaps are destroyed. + */ +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) +{ + if (!entry) + return; + + xa_lock(&entry->ucontext->mmap_xa); + entry->driver_removed = true; + xa_unlock(&entry->ucontext->mmap_xa); + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_remove); + +/** + * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa + * in a given range. + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * @min_pgoff: minimum pgoff to be returned + * @max_pgoff: maximum pgoff to be returned + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for implementing their mmap syscall A database of mmap offsets is + * handled in the core and helper functions are provided to insert entries + * into the database and extract entries when the user calls mmap with the + * given offset. The function allocates a unique page offset in a given range + * that should be provided to user, the user will use the offset to retrieve + * information such as address to be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + u32 xa_first, xa_last, npages; + int err; + u32 i; + u32 j; + + if (!entry) + return -EINVAL; + + kref_init(&entry->ref); + entry->ucontext = ucontext; + + /* + * We want the whole allocation to be done without interruption from a + * different thread. The allocation requires finding a free range and + * storing. During the xa_insert the lock could be released, possibly + * allowing another thread to choose the same range. + */ + mutex_lock(&ufile->umap_lock); + + xa_lock(&ucontext->mmap_xa); + + /* We want to find an empty range */ + npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); + entry->npages = npages; + + /* Find an empty range */ + for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) { + if (xa_load(&ucontext->mmap_xa, i + j) != NULL) { + if (unlikely(i + j == max_pgoff)) + break; + i = i + j + 1; + j = 0; + } else { + if (unlikely(i + j == max_pgoff)) + break; + j++; + } + } + + if (j != npages) + goto err_unlock; + + xa_first = i; + xa_last = i + j; + + for (i = xa_first; i < xa_last; i++) { + err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); + if (err) + goto err_undo; + } + + /* + * Internally the kernel uses a page offset, in libc this is a byte + * offset. Drivers should not return pgoff to userspace. + */ + entry->start_pgoff = xa_first; + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + + return 0; + +err_undo: + for (; i > xa_first; i--) + __xa_erase(&ucontext->mmap_xa, i - 1); + +err_unlock: + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + return -ENOMEM; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for handling user mmapped addresses. The database is handled in + * the core and helper functions are provided to insert entries into the + * database and extract entries when the user calls mmap with the given offset. + * The function allocates a unique page offset that should be provided to user, + * the user will use the offset to retrieve information such as address to + * be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, + U32_MAX); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/sys/ofed/drivers/infiniband/core/ib_cq.c b/sys/ofed/drivers/infiniband/core/ib_cq.c --- a/sys/ofed/drivers/infiniband/core/ib_cq.c +++ b/sys/ofed/drivers/infiniband/core/ib_cq.c @@ -86,14 +86,17 @@ } struct ib_cq * -ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx) +__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata) { struct ib_cq_init_attr cq_attr = { .cqe = nr_cqe, .comp_vector = comp_vector, }; struct ib_cq *cq; + int ret; /* * Check for invalid parameters early on to avoid @@ -108,17 +111,19 @@ return (ERR_PTR(-EINVAL)); } - cq = dev->create_cq(dev, &cq_attr, NULL, NULL); - if (IS_ERR(cq)) - return (cq); + cq = rdma_zalloc_drv_obj(dev, ib_cq); + if (!cq) + return ERR_PTR(-ENOMEM); cq->device = dev; - cq->uobject = NULL; - cq->event_handler = NULL; cq->cq_context = private; cq->poll_ctx = poll_ctx; atomic_set(&cq->usecnt, 0); + ret = dev->create_cq(cq, &cq_attr, NULL); + if (ret) + goto out_free_cq; + switch (poll_ctx) { case IB_POLL_DIRECT: cq->comp_handler = NULL; /* no hardware completions */ @@ -133,11 +138,15 @@ break; } return (cq); + +out_free_cq: + kfree(cq); + return (ERR_PTR(ret)); } -EXPORT_SYMBOL(ib_alloc_cq); +EXPORT_SYMBOL(__ib_alloc_cq_user); void -ib_free_cq(struct ib_cq *cq) +ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { if (WARN_ON_ONCE(atomic_read(&cq->usecnt) != 0)) @@ -154,6 +163,7 @@ break; } - (void)cq->device->destroy_cq(cq); + cq->device->destroy_cq(cq, udata); + kfree(cq); } -EXPORT_SYMBOL(ib_free_cq); +EXPORT_SYMBOL(ib_free_cq_user); diff --git a/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c b/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c --- a/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c +++ b/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c @@ -86,7 +86,7 @@ { deref_rmpp_recv(rmpp_recv); wait_for_completion(&rmpp_recv->comp); - ib_destroy_ah(rmpp_recv->ah); + ib_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE); kfree(rmpp_recv); } @@ -176,7 +176,7 @@ hdr_len, 0, GFP_KERNEL, IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) - ib_destroy_ah(ah); + ib_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); else { msg->ah = ah; msg->context[0] = ah; @@ -206,7 +206,7 @@ ret = ib_post_send_mad(msg, NULL); if (ret) { - ib_destroy_ah(msg->ah); + ib_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(msg); } } @@ -214,7 +214,7 @@ void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) - ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_send_wc->send_buf); } @@ -242,7 +242,7 @@ ret = ib_post_send_mad(msg, NULL); if (ret) { - ib_destroy_ah(msg->ah); + ib_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(msg); } } diff --git a/sys/ofed/drivers/infiniband/core/ib_rdma_core.c b/sys/ofed/drivers/infiniband/core/ib_rdma_core.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_rdma_core.c @@ -0,0 +1,943 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "uverbs.h" +#include "core_priv.h" +#include "rdma_core.h" + +static void uverbs_uobject_free(struct kref *ref) +{ + kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu); +} + +/* + * In order to indicate we no longer needs this uobject, uverbs_uobject_put + * is called. When the reference count is decreased, the uobject is freed. + * For example, this is used when attaching a completion channel to a CQ. + */ +void uverbs_uobject_put(struct ib_uobject *uobject) +{ + kref_put(&uobject->ref, uverbs_uobject_free); +} +EXPORT_SYMBOL(uverbs_uobject_put); + +static int uverbs_try_lock_object(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ + /* + * When a shared access is required, we use a positive counter. Each + * shared access request checks that the value != -1 and increment it. + * Exclusive access is required for operations like write or destroy. + * In exclusive access mode, we check that the counter is zero (nobody + * claimed this object) and we set it to -1. Releasing a shared access + * lock is done simply by decreasing the counter. As for exclusive + * access locks, since only a single one of them is is allowed + * concurrently, setting the counter to zero is enough for releasing + * this lock. + */ + switch (mode) { + case UVERBS_LOOKUP_READ: + return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ? + -EBUSY : 0; + case UVERBS_LOOKUP_WRITE: + /* lock is exclusive */ + return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; + case UVERBS_LOOKUP_DESTROY: + return 0; + } + return 0; +} + +static void assert_uverbs_usecnt(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ +#ifdef CONFIG_LOCKDEP + switch (mode) { + case UVERBS_LOOKUP_READ: + WARN_ON(atomic_read(&uobj->usecnt) <= 0); + break; + case UVERBS_LOOKUP_WRITE: + WARN_ON(atomic_read(&uobj->usecnt) != -1); + break; + case UVERBS_LOOKUP_DESTROY: + break; + } +#endif +} + +/* + * This must be called with the hw_destroy_rwsem locked for read or write, + * also the uobject itself must be locked for write. + * + * Upon return the HW object is guaranteed to be destroyed. + * + * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held, + * however the type's allocat_commit function cannot have been called and the + * uobject cannot be on the uobjects_lists + * + * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via + * rdma_lookup_get_uobject) and the object is left in a state where the caller + * needs to call rdma_lookup_put_uobject. + * + * For all other destroy modes this function internally unlocks the uobject + * and consumes the kref on the uobj. + */ +static int uverbs_destroy_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason reason, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_file *ufile = attrs->ufile; + unsigned long flags; + int ret; + + lockdep_assert_held(&ufile->hw_destroy_rwsem); + assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); + + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->uapi_object->type_class->alloc_abort(uobj); + } else if (uobj->object) { + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, + attrs); + if (ret) { + if (ib_is_destroy_retryable(ret, reason, uobj)) + return ret; + + /* Nothing to be done, dangle the memory and move on */ + WARN(true, + "ib_uverbs: failed to remove uobject id %d, driver err=%d", + uobj->id, ret); + } + + uobj->object = NULL; + } + + uobj->context = NULL; + + /* + * For DESTROY the usecnt is held write locked, the caller is expected + * to put it unlock and put the object when done with it. Only DESTROY + * can remove the IDR handle. + */ + if (reason != RDMA_REMOVE_DESTROY) + atomic_set(&uobj->usecnt, 0); + else + uobj->uapi_object->type_class->remove_handle(uobj); + + if (!list_empty(&uobj->list)) { + spin_lock_irqsave(&ufile->uobjects_lock, flags); + list_del_init(&uobj->list); + spin_unlock_irqrestore(&ufile->uobjects_lock, flags); + + /* + * Pairs with the get in rdma_alloc_commit_uobject(), could + * destroy uobj. + */ + uverbs_uobject_put(uobj); + } + + /* + * When aborting the stack kref remains owned by the core code, and is + * not transferred into the type. Pairs with the get in alloc_uobj + */ + if (reason == RDMA_REMOVE_ABORT) + uverbs_uobject_put(uobj); + + return 0; +} + +/* + * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY + * sequence. It should only be used from command callbacks. On success the + * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This + * version requires the caller to have already obtained an + * LOOKUP_DESTROY uobject kref. + */ +int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_file *ufile = attrs->ufile; + int ret; + + down_read(&ufile->hw_destroy_rwsem); + + ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); + if (ret) + goto out_unlock; + + ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs); + if (ret) { + atomic_set(&uobj->usecnt, 0); + goto out_unlock; + } + +out_unlock: + up_read(&ufile->hw_destroy_rwsem); + return ret; +} + +/* + * uobj_get_destroy destroys the HW object and returns a handle to the uobj + * with a NULL object pointer. The caller must pair this with + * uverbs_put_destroy. + */ +struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, + u32 id, struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj; + int ret; + + uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_DESTROY, attrs); + if (IS_ERR(uobj)) + return uobj; + + ret = uobj_destroy(uobj, attrs); + if (ret) { + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); + return ERR_PTR(ret); + } + + return uobj; +} + +/* + * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success + * (negative errno on failure). For use by callers that do not need the uobj. + */ +int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj; + + uobj = __uobj_get_destroy(obj, id, attrs); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + return 0; +} + +/* alloc_uobj must be undone by uverbs_destroy_uobject() */ +static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs, + const struct uverbs_api_object *obj) +{ + struct ib_uverbs_file *ufile = attrs->ufile; + struct ib_uobject *uobj; + + if (!attrs->context) { + struct ib_ucontext *ucontext = + ib_uverbs_get_ucontext_file(ufile); + + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + attrs->context = ucontext; + } + + uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); + if (!uobj) + return ERR_PTR(-ENOMEM); + /* + * user_handle should be filled by the handler, + * The object is added to the list in the commit stage. + */ + uobj->ufile = ufile; + uobj->context = attrs->context; + INIT_LIST_HEAD(&uobj->list); + uobj->uapi_object = obj; + /* + * Allocated objects start out as write locked to deny any other + * syscalls from accessing them until they are committed. See + * rdma_alloc_commit_uobject + */ + atomic_set(&uobj->usecnt, -1); + kref_init(&uobj->ref); + + return uobj; +} + +#define NULL_IB_UOBJECT ((struct ib_uobject *)1) + +static int idr_add_uobj(struct ib_uobject *uobj) +{ + /* + * We start with allocating an idr pointing to NULL. This represents an + * object which isn't initialized yet. We'll replace it later on with + * the real object once we commit. + */ + return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL_IB_UOBJECT, xa_limit_32b, + GFP_KERNEL); +} + +/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ +static struct ib_uobject * +lookup_get_idr_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) +{ + struct ib_uobject *uobj; + + if (id < 0 || id > ULONG_MAX) + return ERR_PTR(-EINVAL); + + rcu_read_lock(); + /* + * The idr_find is guaranteed to return a pointer to something that + * isn't freed yet, or NULL, as the free after idr_remove goes through + * kfree_rcu(). However the object may still have been released and + * kfree() could be called at any time. + */ + uobj = xa_load(&ufile->idr, id); + if (!uobj || uobj == NULL_IB_UOBJECT || !kref_get_unless_zero(&uobj->ref)) + uobj = ERR_PTR(-ENOENT); + rcu_read_unlock(); + return uobj; +} + +static struct ib_uobject * +lookup_get_fd_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode) +{ + const struct uverbs_obj_fd_type *fd_type; + struct file *f; + struct ib_uobject *uobject; + int fdno = id; + + if (fdno != id) + return ERR_PTR(-EINVAL); + + if (mode != UVERBS_LOOKUP_READ) + return ERR_PTR(-EOPNOTSUPP); + + if (!obj->type_attrs) + return ERR_PTR(-EIO); + fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + + f = fget(fdno); + if (!f) + return ERR_PTR(-EBADF); + + uobject = f->private_data; + /* + * fget(id) ensures we are not currently running + * uverbs_uobject_fd_release(), and the caller is expected to ensure + * that release is never done while a call to lookup is possible. + */ + if (f->f_op != fd_type->fops) { + fput(f); + return ERR_PTR(-EBADF); + } + + uverbs_uobject_get(uobject); + return uobject; +} + +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj; + int ret; + + if (obj == ERR_PTR(-ENOMSG)) { + /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */ + uobj = lookup_get_idr_uobject(NULL, ufile, id, mode); + if (IS_ERR(uobj)) + return uobj; + } else { + if (IS_ERR(obj)) + return ERR_PTR(-EINVAL); + + uobj = obj->type_class->lookup_get(obj, ufile, id, mode); + if (IS_ERR(uobj)) + return uobj; + + if (uobj->uapi_object != obj) { + ret = -EINVAL; + goto free; + } + } + + /* + * If we have been disassociated block every command except for + * DESTROY based commands. + */ + if (mode != UVERBS_LOOKUP_DESTROY && + !srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu)) { + ret = -EIO; + goto free; + } + + ret = uverbs_try_lock_object(uobj, mode); + if (ret) + goto free; + if (attrs) + attrs->context = uobj->context; + + return uobj; +free: + uobj->uapi_object->type_class->lookup_put(uobj, mode); + uverbs_uobject_put(uobj); + return ERR_PTR(ret); +} + +static struct ib_uobject * +alloc_begin_idr_uobject(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs) +{ + int ret; + struct ib_uobject *uobj; + + uobj = alloc_uobj(attrs, obj); + if (IS_ERR(uobj)) + return uobj; + + ret = idr_add_uobj(uobj); + if (ret) + goto uobj_put; + + ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto remove; + + return uobj; + +remove: + xa_erase(&attrs->ufile->idr, uobj->id); +uobj_put: + uverbs_uobject_put(uobj); + return ERR_PTR(ret); +} + +static struct ib_uobject * +alloc_begin_fd_uobject(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_obj_fd_type *fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); + int new_fd; + struct ib_uobject *uobj; + struct file *filp; + + if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release)) + return ERR_PTR(-EINVAL); + + new_fd = get_unused_fd_flags(O_CLOEXEC); + if (new_fd < 0) + return ERR_PTR(new_fd); + + uobj = alloc_uobj(attrs, obj); + if (IS_ERR(uobj)) + goto err_fd; + + /* Note that uverbs_uobject_fd_release() is called during abort */ + filp = alloc_file(fd_type->flags, fd_type->fops); + if (IS_ERR(filp)) { + uobj = ERR_CAST(filp); + goto err_uobj; + } + uobj->object = filp; + + uobj->id = new_fd; + return uobj; + +err_uobj: + uverbs_uobject_put(uobj); +err_fd: + put_unused_fd(new_fd); + return uobj; +} + +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_file *ufile = attrs->ufile; + struct ib_uobject *ret; + + if (IS_ERR(obj)) + return ERR_PTR(-EINVAL); + + /* + * The hw_destroy_rwsem is held across the entire object creation and + * released during rdma_alloc_commit_uobject or + * rdma_alloc_abort_uobject + */ + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + return ERR_PTR(-EIO); + + ret = obj->type_class->alloc_begin(obj, attrs); + if (IS_ERR(ret)) { + up_read(&ufile->hw_destroy_rwsem); + return ret; + } + return ret; +} + +static void alloc_abort_idr_uobject(struct ib_uobject *uobj) +{ + xa_erase(&uobj->ufile->idr, uobj->id); +} + +static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_obj_idr_type *idr_type = + container_of(uobj->uapi_object->type_attrs, + struct uverbs_obj_idr_type, type); + int ret = idr_type->destroy_object(uobj, why, attrs); + + /* + * We can only fail gracefully if the user requested to destroy the + * object or when a retry may be called upon an error. + * In the rest of the cases, just remove whatever you can. + */ + if (ib_is_destroy_retryable(ret, why, uobj)) + return ret; + + if (why == RDMA_REMOVE_ABORT) + return 0; + + return 0; +} + +static void remove_handle_idr_uobject(struct ib_uobject *uobj) +{ + xa_erase(&uobj->ufile->idr, uobj->id); + /* Matches the kref in alloc_commit_idr_uobject */ + uverbs_uobject_put(uobj); +} + +static void alloc_abort_fd_uobject(struct ib_uobject *uobj) +{ + struct file *filp = uobj->object; + + fput(filp); + put_unused_fd(uobj->id); +} + +static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_obj_fd_type *fd_type = container_of( + uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); + int ret = fd_type->destroy_object(uobj, why); + + if (ib_is_destroy_retryable(ret, why, uobj)) + return ret; + + return 0; +} + +static void remove_handle_fd_uobject(struct ib_uobject *uobj) +{ +} + +static void alloc_commit_idr_uobject(struct ib_uobject *uobj) +{ + struct ib_uverbs_file *ufile = uobj->ufile; + void *old; + + /* + * We already allocated this IDR with a NULL object, so + * this shouldn't fail. + * + * NOTE: Storing the uobj transfers our kref on uobj to the XArray. + * It will be put by remove_commit_idr_uobject() + */ + old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); + WARN_ON(old != NULL_IB_UOBJECT); +} + +static void alloc_commit_fd_uobject(struct ib_uobject *uobj) +{ + int fd = uobj->id; + struct file *filp = uobj->object; + + /* Matching put will be done in uverbs_uobject_fd_release() */ + kref_get(&uobj->ufile->ref); + + /* This shouldn't be used anymore. Use the file object instead */ + uobj->id = 0; + + /* + * NOTE: Once we install the file we loose ownership of our kref on + * uobj. It will be put by uverbs_uobject_fd_release() + */ + filp->private_data = uobj; + fd_install(fd, filp); +} + +/* + * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the + * caller can no longer assume uobj is valid. If this function fails it + * destroys the uboject, including the attached HW object. + */ +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_file *ufile = attrs->ufile; + + /* alloc_commit consumes the uobj kref */ + uobj->uapi_object->type_class->alloc_commit(uobj); + + /* kref is held so long as the uobj is on the uobj list. */ + uverbs_uobject_get(uobj); + spin_lock_irq(&ufile->uobjects_lock); + list_add(&uobj->list, &ufile->uobjects); + spin_unlock_irq(&ufile->uobjects_lock); + + /* matches atomic_set(-1) in alloc_uobj */ + atomic_set(&uobj->usecnt, 0); + + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); +} + +/* + * This consumes the kref for uobj. It is up to the caller to unwind the HW + * object and anything else connected to uobj before calling this. + */ +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_file *ufile = uobj->ufile; + + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); + + /* Matches the down_read in rdma_alloc_begin_uobject */ + up_read(&ufile->hw_destroy_rwsem); +} + +static void lookup_put_idr_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ +} + +static void lookup_put_fd_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ + struct file *filp = uobj->object; + + WARN_ON(mode != UVERBS_LOOKUP_READ); + /* + * This indirectly calls uverbs_uobject_fd_release() and free the + * object + */ + fput(filp); +} + +void rdma_lookup_put_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode) +{ + assert_uverbs_usecnt(uobj, mode); + uobj->uapi_object->type_class->lookup_put(uobj, mode); + /* + * In order to unlock an object, either decrease its usecnt for + * read access or zero it in case of exclusive access. See + * uverbs_try_lock_object for locking schema information. + */ + switch (mode) { + case UVERBS_LOOKUP_READ: + atomic_dec(&uobj->usecnt); + break; + case UVERBS_LOOKUP_WRITE: + atomic_set(&uobj->usecnt, 0); + break; + case UVERBS_LOOKUP_DESTROY: + break; + } + + /* Pairs with the kref obtained by type->lookup_get */ + uverbs_uobject_put(uobj); +} + +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC); +} + +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile) +{ + struct ib_uobject *entry; + unsigned long id; + + /* + * At this point uverbs_cleanup_ufile() is guaranteed to have run, and + * there are no HW objects left, however the xarray is still populated + * with anything that has not been cleaned up by userspace. Since the + * kref on ufile is 0, nothing is allowed to call lookup_get. + * + * This is an optimized equivalent to remove_handle_idr_uobject + */ + xa_for_each(&ufile->idr, id, entry) { + WARN_ON(entry->object); + uverbs_uobject_put(entry); + } + + xa_destroy(&ufile->idr); +} + +const struct uverbs_obj_type_class uverbs_idr_class = { + .alloc_begin = alloc_begin_idr_uobject, + .lookup_get = lookup_get_idr_uobject, + .alloc_commit = alloc_commit_idr_uobject, + .alloc_abort = alloc_abort_idr_uobject, + .lookup_put = lookup_put_idr_uobject, + .destroy_hw = destroy_hw_idr_uobject, + .remove_handle = remove_handle_idr_uobject, +}; +EXPORT_SYMBOL(uverbs_idr_class); + +/* + * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct + * file_operations release method. + */ +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_file *ufile; + struct ib_uobject *uobj; + + /* + * This can only happen if the fput came from alloc_abort_fd_uobject() + */ + if (!filp->private_data) + return 0; + uobj = filp->private_data; + ufile = uobj->ufile; + + if (down_read_trylock(&ufile->hw_destroy_rwsem)) { + struct uverbs_attr_bundle attrs = { + .context = uobj->context, + .ufile = ufile, + }; + + /* + * lookup_get_fd_uobject holds the kref on the struct file any + * time a FD uobj is locked, which prevents this release + * method from being invoked. Meaning we can always get the + * write lock here, or we have a kernel bug. + */ + WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE)); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs); + up_read(&ufile->hw_destroy_rwsem); + } + + /* Matches the get in alloc_commit_fd_uobject() */ + kref_put(&ufile->ref, ib_uverbs_release_file); + + /* Pairs with filp->private_data in alloc_begin_fd_uobject */ + uverbs_uobject_put(uobj); + return 0; +} +EXPORT_SYMBOL(uverbs_uobject_fd_release); + +/* + * Drop the ucontext off the ufile and completely disconnect it from the + * ib_device + */ +static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_ucontext *ucontext = ufile->ucontext; + struct ib_device *ib_dev = ucontext->device; + + /* + * If we are closing the FD then the user mmap VMAs must have + * already been destroyed as they hold on to the filep, otherwise + * they need to be zap'd. + */ + if (reason == RDMA_REMOVE_DRIVER_REMOVE) { + uverbs_user_mmap_disassociate(ufile); + if (ib_dev->disassociate_ucontext) + ib_dev->disassociate_ucontext(ucontext); + } + + ib_dev->dealloc_ucontext(ucontext); + WARN_ON(!xa_empty(&ucontext->mmap_xa)); + kfree(ucontext); + + ufile->ucontext = NULL; +} + +static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_uobject *obj, *next_obj; + int ret = -EINVAL; + struct uverbs_attr_bundle attrs = { .ufile = ufile }; + + /* + * This shouldn't run while executing other commands on this + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per traversal in order to let + * other threads (which might still use the FDs) chance to run. + */ + list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { + attrs.context = obj->context; + /* + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. + */ + WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (!uverbs_destroy_uobject(obj, reason, &attrs)) + ret = 0; + else + atomic_set(&obj->usecnt, 0); + } + return ret; +} + +/* + * Destroy the uncontext and every uobject associated with it. + * + * This is internally locked and can be called in parallel from multiple + * contexts. + */ +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + down_write(&ufile->hw_destroy_rwsem); + + /* + * If a ucontext was never created then we can't have any uobjects to + * cleanup, nothing to do. + */ + if (!ufile->ucontext) + goto done; + + ufile->ucontext->closing = true; + ufile->ucontext->cleanup_retryable = true; + while (!list_empty(&ufile->uobjects)) + if (__uverbs_cleanup_ufile(ufile, reason)) { + /* + * No entry was cleaned-up successfully during this + * iteration + */ + break; + } + + ufile->ucontext->cleanup_retryable = false; + if (!list_empty(&ufile->uobjects)) + __uverbs_cleanup_ufile(ufile, reason); + + ufile_destroy_ucontext(ufile, reason); + +done: + up_write(&ufile->hw_destroy_rwsem); +} + +const struct uverbs_obj_type_class uverbs_fd_class = { + .alloc_begin = alloc_begin_fd_uobject, + .lookup_get = lookup_get_fd_uobject, + .alloc_commit = alloc_commit_fd_uobject, + .alloc_abort = alloc_abort_fd_uobject, + .lookup_put = lookup_put_fd_uobject, + .destroy_hw = destroy_hw_fd_uobject, + .remove_handle = remove_handle_fd_uobject, +}; +EXPORT_SYMBOL(uverbs_fd_class); + +struct ib_uobject * +uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, + s64 id, struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_api_object *obj = + uapi_get_object(attrs->ufile->device->uapi, object_id); + + switch (access) { + case UVERBS_ACCESS_READ: + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_READ, attrs); + case UVERBS_ACCESS_DESTROY: + /* Actual destruction is done inside uverbs_handle_method */ + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_DESTROY, attrs); + case UVERBS_ACCESS_WRITE: + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_WRITE, attrs); + case UVERBS_ACCESS_NEW: + return rdma_alloc_begin_uobject(obj, attrs); + default: + WARN_ON(true); + return ERR_PTR(-EOPNOTSUPP); + } +} + +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs) +{ + /* + * refcounts should be handled at the object level and not at the + * uobject level. Refcounts of the objects themselves are done in + * handlers. + */ + + switch (access) { + case UVERBS_ACCESS_READ: + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); + break; + case UVERBS_ACCESS_WRITE: + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + break; + case UVERBS_ACCESS_DESTROY: + if (uobj) + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); + break; + case UVERBS_ACCESS_NEW: + if (commit) + rdma_alloc_commit_uobject(uobj, attrs); + else + rdma_alloc_abort_uobject(uobj, attrs); + break; + default: + WARN_ON(true); + } +} diff --git a/sys/ofed/drivers/infiniband/core/ib_sa_query.c b/sys/ofed/drivers/infiniband/core/ib_sa_query.c --- a/sys/ofed/drivers/infiniband/core/ib_sa_query.c +++ b/sys/ofed/drivers/infiniband/core/ib_sa_query.c @@ -495,7 +495,7 @@ { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); - ib_destroy_ah(sm_ah->ah); + ib_destroy_ah(sm_ah->ah, 0); kfree(sm_ah); } @@ -535,7 +535,7 @@ ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); } - new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); + new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(new_ah->ah)) { pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); diff --git a/sys/ofed/drivers/infiniband/core/ib_user_mad.c b/sys/ofed/drivers/infiniband/core/ib_user_mad.c --- a/sys/ofed/drivers/infiniband/core/ib_user_mad.c +++ b/sys/ofed/drivers/infiniband/core/ib_user_mad.c @@ -201,7 +201,7 @@ struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); - ib_destroy_ah(packet->msg->ah); + ib_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { @@ -509,7 +509,7 @@ ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } - ah = ib_create_ah(agent->qp->pd, &ah_attr); + ah = ib_create_user_ah(agent->qp->pd, &ah_attr, NULL); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; @@ -603,7 +603,7 @@ err_msg: ib_free_send_mad(packet->msg); err_ah: - ib_destroy_ah(ah); + ib_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err_up: mutex_unlock(&file->mutex); err: diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c --- a/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c @@ -40,392 +40,291 @@ #define LINUXKPI_PARAM_PREFIX ibcore_ +#include + #include #include #include #include -#include -#include +#include + +#include +#include +#include "rdma_core.h" #include "uverbs.h" #include "core_priv.h" -#include - -struct uverbs_lock_class { - char name[16]; -}; - -static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; -static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; -static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; -static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; -static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; -static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; -static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; -static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; -static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; -static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; -static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; - /* - * The ib_uobject locking scheme is as follows: - * - * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr write operations. When an object is - * looked up, a reference must be taken on the object's kref before - * dropping this lock. For read operations, the rcu_read_lock() - * and rcu_write_lock() but similarly the kref reference is grabbed - * before the rcu_read_unlock(). + * Copy a response to userspace. If the provided 'resp' is larger than the + * user buffer it is silently truncated. If the user provided a larger buffer + * then the trailing portion is zero filled. * - * - Each object also has an rwsem. This rwsem must be held for - * reading while an operation that uses the object is performed. - * For example, while registering an MR, the associated PD's - * uobject.mutex must be held for reading. The rwsem must be held - * for writing while initializing or destroying an object. - * - * - In addition, each object has a "live" flag. If this flag is not - * set, then lookups of the object will fail even if it is found in - * the idr. This handles a reader that blocks and does not acquire - * the rwsem until after the object is destroyed. The destroy - * operation will set the live flag to 0 and then drop the rwsem; - * this will allow the reader to acquire the rwsem, see that the - * live flag is 0, and then drop the rwsem and its reference to - * object. The underlying storage will not be freed until the last - * reference to the object is dropped. + * These semantics are intended to support future extension of the output + * structures. */ - -static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct uverbs_lock_class *c) -{ - uobj->user_handle = user_handle; - uobj->context = context; - kref_init(&uobj->ref); - init_rwsem(&uobj->mutex); - uobj->live = 0; -} - -static void release_uobj(struct kref *kref) -{ - kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); -} - -static void put_uobj(struct ib_uobject *uobj) -{ - kref_put(&uobj->ref, release_uobj); -} - -static void put_uobj_read(struct ib_uobject *uobj) -{ - up_read(&uobj->mutex); - put_uobj(uobj); -} - -static void put_uobj_write(struct ib_uobject *uobj) -{ - up_write(&uobj->mutex); - put_uobj(uobj); -} - -static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) +static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp, + size_t resp_len) { int ret; - idr_preload(GFP_KERNEL); - spin_lock(&ib_uverbs_idr_lock); + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT)) + return uverbs_copy_to_struct_or_zero( + attrs, UVERBS_ATTR_CORE_OUT, resp, resp_len); - ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&ib_uverbs_idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) -{ - spin_lock(&ib_uverbs_idr_lock); - idr_remove(idr, uobj->id); - spin_unlock(&ib_uverbs_idr_lock); -} - -static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - rcu_read_lock(); - uobj = idr_find(idr, id); - if (uobj) { - if (uobj->context == context) - kref_get(&uobj->ref); - else - uobj = NULL; - } - rcu_read_unlock(); - - return uobj; -} - -static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context, int nested) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; + if (copy_to_user(attrs->ucore.outbuf, resp, + min(attrs->ucore.outlen, resp_len))) + return -EFAULT; - if (nested) - down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); - else - down_read(&uobj->mutex); - if (!uobj->live) { - put_uobj_read(uobj); - return NULL; + if (resp_len < attrs->ucore.outlen) { + /* + * Zero fill any extra memory that user + * space might have provided. + */ + ret = clear_user(attrs->ucore.outbuf + resp_len, + attrs->ucore.outlen - resp_len); + if (ret) + return -EFAULT; } - return uobj; + return 0; } -static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, - struct ib_ucontext *context) +/* + * Copy a request from userspace. If the provided 'req' is larger than the + * user buffer then the user buffer is zero extended into the 'req'. If 'req' + * is smaller than the user buffer then the uncopied bytes in the user buffer + * must be zero. + */ +static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req, + size_t req_len) { - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; + if (copy_from_user(req, attrs->ucore.inbuf, + min(attrs->ucore.inlen, req_len))) + return -EFAULT; - down_write(&uobj->mutex); - if (!uobj->live) { - put_uobj_write(uobj); - return NULL; + if (attrs->ucore.inlen < req_len) { + memset((u8 *)req + attrs->ucore.inlen, 0, + req_len - attrs->ucore.inlen); + } else if (attrs->ucore.inlen > req_len) { + if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len, + attrs->ucore.inlen - req_len)) + return -EOPNOTSUPP; } - - return uobj; + return 0; } -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, - int nested) +/* + * Generate the value for the 'response_length' protocol used by write_ex. + * This is the number of bytes the kernel actually wrote. Userspace can use + * this to detect what structure members in the response the kernel + * understood. + */ +static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs, + size_t resp_len) { - struct ib_uobject *uobj; - - uobj = idr_read_uobj(idr, id, context, nested); - return uobj ? uobj->object : NULL; + return min_t(size_t, attrs->ucore.outlen, resp_len); } -static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); -} +/* + * The iterator version of the request interface is for handlers that need to + * step over a flex array at the end of a command header. + */ +struct uverbs_req_iter { + const u8 __user *cur; + const u8 __user *end; +}; -static void put_pd_read(struct ib_pd *pd) +static int uverbs_request_start(struct uverbs_attr_bundle *attrs, + struct uverbs_req_iter *iter, + void *req, + size_t req_len) { - put_uobj_read(pd->uobject); -} + if (attrs->ucore.inlen < req_len) + return -ENOSPC; -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) -{ - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); -} + if (copy_from_user(req, attrs->ucore.inbuf, req_len)) + return -EFAULT; -static void put_cq_read(struct ib_cq *cq) -{ - put_uobj_read(cq->uobject); + iter->cur = attrs->ucore.inbuf + req_len; + iter->end = attrs->ucore.inbuf + attrs->ucore.inlen; + return 0; } -static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) +static int uverbs_request_next(struct uverbs_req_iter *iter, void *val, + size_t len) { - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); -} + if (iter->cur + len > iter->end) + return -ENOSPC; -static void put_ah_read(struct ib_ah *ah) -{ - put_uobj_read(ah->uobject); -} + if (copy_from_user(val, iter->cur, len)) + return -EFAULT; -static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); + iter->cur += len; + return 0; } -static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, + size_t len) { - return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); -} + const void __user *res = iter->cur; -static void put_wq_read(struct ib_wq *wq) -{ - put_uobj_read(wq->uobject); + if (iter->cur + len > iter->end) + return (void __force __user *)ERR_PTR(-ENOSPC); + iter->cur += len; + return res; } -static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, - struct ib_ucontext *context) +static int uverbs_request_finish(struct uverbs_req_iter *iter) { - return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); + if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur)) + return -EOPNOTSUPP; + return 0; } -static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) +/* + * When calling a destroy function during an error unwind we need to pass in + * the udata that is sanitized of all user arguments. Ie from the driver + * perspective it looks like no udata was passed. + */ +struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs) { - put_uobj_read(ind_table->uobject); + attrs->driver_udata = (struct ib_udata){}; + return &attrs->driver_udata; } -static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +static struct ib_uverbs_completion_event_file * +_ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) { - struct ib_uobject *uobj; + struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, + fd, attrs); - uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); - return uobj ? uobj->object : NULL; -} + if (IS_ERR(uobj)) + return (void *)uobj; -static void put_qp_read(struct ib_qp *qp) -{ - put_uobj_read(qp->uobject); -} + uverbs_uobject_get(uobj); + uobj_put_read(uobj); -static void put_qp_write(struct ib_qp *qp) -{ - put_uobj_write(qp->uobject); + return container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); } +#define ib_uverbs_lookup_comp_file(_fd, _ufile) ({ \ + CTASSERT(sizeof(_fd) == sizeof(s32)); \ + _ib_uverbs_lookup_comp_file(_fd, _ufile); \ +}) -static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) +int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs) { - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); -} + struct ib_uverbs_file *ufile = attrs->ufile; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; -static void put_srq_read(struct ib_srq *srq) -{ - put_uobj_read(srq->uobject); -} + ib_dev = srcu_dereference(ufile->device->ib_dev, + &ufile->device->disassociate_srcu); + if (!ib_dev) + return -EIO; -static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, - struct ib_uobject **uobj) -{ - *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); - return *uobj ? (*uobj)->object : NULL; -} + ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext); + if (!ucontext) + return -ENOMEM; -static void put_xrcd_read(struct ib_uobject *uobj) -{ - put_uobj_read(uobj); + ucontext->device = ib_dev; + ucontext->ufile = ufile; + xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + attrs->context = ucontext; + return 0; } -ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) +int ib_init_ucontext(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_get_context cmd; - struct ib_uverbs_get_context_resp resp; - struct ib_udata udata; - struct ib_ucontext *ucontext; - struct file *filp; + struct ib_ucontext *ucontext = attrs->context; + struct ib_uverbs_file *file = attrs->ufile; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - mutex_lock(&file->mutex); - + if (!down_read_trylock(&file->hw_destroy_rwsem)) + return -EIO; + mutex_lock(&file->ucontext_lock); if (file->ucontext) { ret = -EINVAL; goto err; } - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd, out_len - sizeof resp); - - ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); - if (IS_ERR(ucontext)) { - ret = PTR_ERR(ucontext); - goto err; - } + ret = ucontext->device->alloc_ucontext(ucontext, + &attrs->driver_udata); + if (ret) + goto err_uncharge; - ucontext->device = ib_dev; - INIT_LIST_HEAD(&ucontext->pd_list); - INIT_LIST_HEAD(&ucontext->mr_list); - INIT_LIST_HEAD(&ucontext->mw_list); - INIT_LIST_HEAD(&ucontext->cq_list); - INIT_LIST_HEAD(&ucontext->qp_list); - INIT_LIST_HEAD(&ucontext->srq_list); - INIT_LIST_HEAD(&ucontext->ah_list); - INIT_LIST_HEAD(&ucontext->wq_list); - INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); - INIT_LIST_HEAD(&ucontext->xrcd_list); - INIT_LIST_HEAD(&ucontext->rule_list); - rcu_read_lock(); - ucontext->tgid = get_pid(task_pid_group_leader(current)); - rcu_read_unlock(); - ucontext->closing = 0; - ucontext->cleanup_retryable = false; - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - ucontext->umem_tree = RB_ROOT; - init_rwsem(&ucontext->umem_rwsem); - ucontext->odp_mrs_count = 0; - INIT_LIST_HEAD(&ucontext->no_private_counters); - - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) - ucontext->invalidate_range = NULL; - -#endif - - resp.num_comp_vectors = file->device->num_comp_vectors; - - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - goto err_free; - resp.async_fd = ret; + /* + * Make sure that ib_uverbs_get_ucontext() sees the pointer update + * only after all writes to setup the ucontext have completed + */ + atomic_store_rel_ptr((uintptr_t *)&file->ucontext, (uintptr_t)ucontext); - filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); - if (IS_ERR(filp)) { - ret = PTR_ERR(filp); - goto err_fd; - } + mutex_unlock(&file->ucontext_lock); + up_read(&file->hw_destroy_rwsem); + return 0; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_file; - } +err_uncharge: +err: + mutex_unlock(&file->ucontext_lock); + up_read(&file->hw_destroy_rwsem); + return ret; +} - file->ucontext = ucontext; +static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_get_context_resp resp; + struct ib_uverbs_get_context cmd; + struct ib_device *ib_dev; + struct ib_uobject *uobj; + int ret; - fd_install(resp.async_fd, filp); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - mutex_unlock(&file->mutex); + ret = ib_alloc_ucontext(attrs); + if (ret) + return ret; - return in_len; + uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev); + if (IS_ERR(uobj)) { + ret = PTR_ERR(uobj); + goto err_ucontext; + } -err_file: - ib_uverbs_free_async_event_file(file); - fput(filp); + resp = (struct ib_uverbs_get_context_resp){ + .num_comp_vectors = attrs->ufile->device->num_comp_vectors, + .async_fd = uobj->id, + }; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) + goto err_uobj; -err_fd: - put_unused_fd(resp.async_fd); + ret = ib_init_ucontext(attrs); + if (ret) + goto err_uobj; -err_free: - put_pid(ucontext->tgid); - ib_dev->dealloc_ucontext(ucontext); + ib_uverbs_init_async_event_file( + container_of(uobj, struct ib_uverbs_async_event_file, uobj)); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; -err: - mutex_unlock(&file->mutex); +err_uobj: + rdma_alloc_abort_uobject(uobj, attrs); +err_ucontext: + kfree(attrs->context); + attrs->context = NULL; return ret; } -static void copy_query_dev_fields(struct ib_uverbs_file *file, - struct ib_device *ib_dev, +static void copy_query_dev_fields(struct ib_ucontext *ucontext, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { + struct ib_device *ib_dev = ucontext->device; + resp->fw_ver = attr->fw_ver; resp->node_guid = ib_dev->node_guid; resp->sys_image_guid = attr->sys_image_guid; @@ -436,8 +335,8 @@ resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = (u32)(attr->device_cap_flags); - resp->max_sge = attr->max_sge; + resp->device_cap_flags = (u32)attr->device_cap_flags; + resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; resp->max_cqe = attr->max_cqe; @@ -468,113 +367,75 @@ resp->phys_port_cnt = ib_dev->phys_port_cnt; } -ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; + struct ib_ucontext *ucontext; + int ret; - if (out_len < sizeof resp) - return -ENOSPC; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; + copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs); - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_port cmd; struct ib_uverbs_query_port_resp resp; struct ib_port_attr attr; int ret; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; ret = ib_query_port(ib_dev, cmd.port_num, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); + copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num); - resp.state = attr.state; - resp.max_mtu = attr.max_mtu; - resp.active_mtu = attr.active_mtu; - resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = attr.port_cap_flags; - resp.max_msg_sz = attr.max_msg_sz; - resp.bad_pkey_cntr = attr.bad_pkey_cntr; - resp.qkey_viol_cntr = attr.qkey_viol_cntr; - resp.pkey_tbl_len = attr.pkey_tbl_len; - resp.lid = attr.lid; - resp.sm_lid = attr.sm_lid; - resp.lmc = attr.lmc; - resp.max_vl_num = attr.max_vl_num; - resp.sm_sl = attr.sm_sl; - resp.subnet_timeout = attr.subnet_timeout; - resp.init_type_reply = attr.init_type_reply; - resp.active_width = attr.active_width; - resp.active_speed = attr.active_speed; - resp.phys_state = attr.phys_state; - resp.link_layer = rdma_port_get_link_layer(ib_dev, - cmd.port_num); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_pd cmd; struct ib_uverbs_alloc_pd_resp resp; - struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; int ret; + struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd, out_len - sizeof resp); - - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - init_uobj(uobj, 0, file->ucontext, &pd_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); + pd = rdma_zalloc_drv_obj(ib_dev, ib_pd); + if (!pd) { + ret = -ENOMEM; goto err; } @@ -583,85 +444,41 @@ pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); - uobj->object = pd; - ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); + ret = ib_dev->alloc_pd(pd, &attrs->driver_udata); if (ret) - goto err_idr; + goto err_alloc; + uobj->object = pd; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - up_write(&uobj->mutex); - - return in_len; + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - -err_idr: - ib_dealloc_pd(pd); - + ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); + pd = NULL; +err_alloc: + kfree(pd); err: - put_uobj_write(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } -ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dealloc_pd cmd; - struct ib_uobject *uobj; - struct ib_pd *pd; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); - if (!uobj) - return -EINVAL; - pd = uobj->object; - - if (atomic_read(&pd->usecnt)) { - ret = -EBUSY; - goto err_put; - } + int ret; - ret = pd->device->dealloc_pd(uobj->object); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) - goto err_put; - - uobj->live = 0; - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + return ret; -err_put: - put_uobj_write(uobj); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs); } struct xrcd_table_entry { @@ -749,31 +566,23 @@ } } -ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_device *ibudev = attrs->ufile->device; struct ib_uverbs_open_xrcd cmd; struct ib_uverbs_open_xrcd_resp resp; - struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; struct vnode *vnode = NULL; int ret = 0; int new_xrcd = 0; + struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd, out_len - sizeof resp); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - mutex_lock(&file->device->xrcd_tree_mutex); + mutex_lock(&ibudev->xrcd_tree_mutex); if (cmd.fd != -1) { /* search for file descriptor */ @@ -781,7 +590,7 @@ if (ret != 0) goto err_tree_mutex_unlock; - xrcd = find_xrcd(file->device, vnode); + xrcd = find_xrcd(ibudev, vnode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ ret = -EAGAIN; @@ -794,18 +603,15 @@ } } - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) { - ret = -ENOMEM; + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs, + &ib_dev); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); - - down_write(&obj->uobject.mutex); - if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); + xrcd = ib_dev->alloc_xrcd(ib_dev, &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -821,167 +627,103 @@ atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.xrcd_handle = obj->uobject.id; if (vnode != NULL) { if (new_xrcd) { /* create new vnode/xrcd table entry */ - ret = xrcd_table_insert(file->device, vnode, xrcd); + ret = xrcd_table_insert(ibudev, vnode, xrcd); if (ret) - goto err_insert_xrcd; + goto err_dealloc_xrcd; } atomic_inc(&xrcd->usecnt); } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } if (vnode != NULL) vrele(vnode); - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - up_write(&obj->uobject.mutex); + mutex_unlock(&ibudev->xrcd_tree_mutex); - mutex_unlock(&file->device->xrcd_tree_mutex); - return in_len; + rdma_alloc_commit_uobject(&obj->uobject, attrs); + return 0; err_copy: if (vnode != NULL) { if (new_xrcd) - xrcd_table_delete(file->device, vnode); + xrcd_table_delete(ibudev, vnode); atomic_dec(&xrcd->usecnt); } -err_insert_xrcd: - idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - -err_idr: - ib_dealloc_xrcd(xrcd); +err_dealloc_xrcd: + ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs)); err: - put_uobj_write(&obj->uobject); + uobj_alloc_abort(&obj->uobject, attrs); err_tree_mutex_unlock: if (vnode != NULL) vrele(vnode); - mutex_unlock(&file->device->xrcd_tree_mutex); + mutex_unlock(&ibudev->xrcd_tree_mutex); return ret; } -ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_close_xrcd cmd; - struct ib_uobject *uobj; - struct ib_xrcd *xrcd = NULL; - struct vnode *vnode = NULL; - struct ib_uxrcd_object *obj; - int live; - int ret = 0; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - mutex_lock(&file->device->xrcd_tree_mutex); - uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); - if (!uobj) { - ret = -EINVAL; - goto out; - } - - xrcd = uobj->object; - vnode = xrcd->vnode; - obj = container_of(uobj, struct ib_uxrcd_object, uobject); - if (atomic_read(&obj->refcnt)) { - put_uobj_write(uobj); - ret = -EBUSY; - goto out; - } - - if (!vnode || atomic_dec_and_test(&xrcd->usecnt)) { - ret = ib_dealloc_xrcd(uobj->object); - if (!ret) - uobj->live = 0; - } - - live = uobj->live; - if (vnode && ret) - atomic_inc(&xrcd->usecnt); - - put_uobj_write(uobj); + int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) - goto out; - - if (vnode && !live) - xrcd_table_delete(file->device, vnode); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - ret = in_len; + return ret; -out: - mutex_unlock(&file->device->xrcd_tree_mutex); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs); } -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, - struct ib_xrcd *xrcd) +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct vnode *vnode; + int ret; + struct ib_uverbs_device *dev = attrs->ufile->device; vnode = xrcd->vnode; if (vnode && !atomic_dec_and_test(&xrcd->usecnt)) - return; + return 0; - ib_dealloc_xrcd(xrcd); + ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata); + + if (ib_is_destroy_retryable(ret, why, uobject)) { + atomic_inc(&xrcd->usecnt); + return ret; + } if (vnode) xrcd_table_delete(dev, vnode); + + return ret; } -ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; - struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; int ret; + struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd, out_len - sizeof resp); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; @@ -990,14 +732,11 @@ if (ret) return ret; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mr_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; @@ -1013,7 +752,8 @@ } mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, - cmd.access_flags, &udata); + cmd.access_flags, + &attrs->driver_udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; @@ -1021,74 +761,52 @@ mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_USER; + mr->dm = NULL; + mr->sig_attrs = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); - if (ret) - goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; resp.rkey = mr->rkey; resp.mr_handle = uobj->id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - - put_pd_read(pd); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mr_list); - mutex_unlock(&file->mutex); + uobj_put_obj_read(pd); - uobj->live = 1; - - up_write(&uobj->mutex); - - return in_len; + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - -err_unreg: - ib_dereg_mr(mr); + ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); err_put: - put_pd_read(pd); + uobj_put_obj_read(pd); err_free: - put_uobj_write(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } -ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_udata udata; struct ib_pd *pd = NULL; struct ib_mr *mr; struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response + sizeof(resp)), - in_len - sizeof(cmd), out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) return -EINVAL; @@ -1098,14 +816,17 @@ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, - file->ucontext); - - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); mr = uobj->object; + if (mr->dm) { + ret = -EINVAL; + goto put_uobjs; + } + if (cmd.flags & IB_MR_REREG_ACCESS) { ret = ib_check_mr_access(cmd.access_flags); if (ret) @@ -1113,7 +834,8 @@ } if (cmd.flags & IB_MR_REREG_PD) { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + attrs); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -1122,116 +844,76 @@ old_pd = mr->pd; ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, &udata); - if (!ret) { - if (cmd.flags & IB_MR_REREG_PD) { - atomic_inc(&pd->usecnt); - mr->pd = pd; - atomic_dec(&old_pd->usecnt); - } - } else { + cmd.length, cmd.hca_va, + cmd.access_flags, pd, + &attrs->driver_udata); + if (ret) goto put_uobj_pd; + + if (cmd.flags & IB_MR_REREG_PD) { + atomic_inc(&pd->usecnt); + mr->pd = pd; + atomic_dec(&old_pd->usecnt); } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) - ret = -EFAULT; - else - ret = in_len; + ret = uverbs_response(attrs, &resp, sizeof(resp)); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - put_pd_read(pd); + uobj_put_obj_read(pd); put_uobjs: - - put_uobj_write(mr->uobject); + uobj_put_write(uobj); return ret; } -ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dereg_mr cmd; - struct ib_mr *mr; - struct ib_uobject *uobj; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mr = uobj->object; - - ret = ib_dereg_mr(mr); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); } -ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_mw cmd; struct ib_uverbs_alloc_mw_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; - struct ib_udata udata; int ret; + struct ib_device *ib_dev; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return -ENOMEM; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - init_uobj(uobj, 0, file->ucontext, &mw_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response + sizeof(resp)), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + if (cmd.mw_type != IB_MW_TYPE_1 && cmd.mw_type != IB_MW_TYPE_2) { + ret = -EINVAL; + goto err_put; + } - mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); + mw = pd->device->alloc_mw(pd, cmd.mw_type, &attrs->driver_udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); goto err_put; @@ -1243,264 +925,162 @@ atomic_inc(&pd->usecnt); uobj->object = mw; - ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); - if (ret) - goto err_unalloc; memset(&resp, 0, sizeof(resp)); resp.rkey = mw->rkey; resp.mw_handle = uobj->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, - &resp, sizeof(resp))) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - - put_pd_read(pd); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mw_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); - - return in_len; + uobj_put_obj_read(pd); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - -err_unalloc: uverbs_dealloc_mw(mw); - err_put: - put_pd_read(pd); - + uobj_put_obj_read(pd); err_free: - put_uobj_write(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } -ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dealloc_mw cmd; - struct ib_mw *mw; - struct ib_uobject *uobj; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mw = uobj->object; - - ret = uverbs_dealloc_mw(mw); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs); } -ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; - struct file *filp; + struct ib_uobject *uobj; + struct ib_uverbs_completion_event_file *ev_file; + struct ib_device *ib_dev; int ret; - if (out_len < sizeof resp) - return -ENOSPC; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - return ret; - resp.fd = ret; + resp.fd = uobj->id; - filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); - if (IS_ERR(filp)) { - put_unused_fd(resp.fd); - return PTR_ERR(filp); - } + ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); + ib_uverbs_init_event_queue(&ev_file->ev_queue); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - put_unused_fd(resp.fd); - fput(filp); - return -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) { + uobj_alloc_abort(uobj, attrs); + return ret; } - fd_install(resp.fd, filp); - return in_len; + rdma_alloc_commit_uobject(uobj, attrs); + return 0; } -static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_cq *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *udata, - void *context), - void *context) +static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_cq *cmd) { struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file = NULL; + struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_cq *cq; int ret; struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; + struct ib_device *ib_dev; - if (cmd->comp_vector >= file->device->num_comp_vectors) + if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); - down_write(&obj->uobject.mutex); + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs, + &ib_dev); + if (IS_ERR(obj)) + return obj; if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); - if (!ev_file) { - ret = -EINVAL; + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs); + if (IS_ERR(ev_file)) { + ret = PTR_ERR(ev_file); goto err; } } - obj->uverbs_file = file; - obj->comp_events_reported = 0; - obj->async_events_reported = 0; + obj->uevent.uobject.user_handle = cmd->user_handle; INIT_LIST_HEAD(&obj->comp_list); - INIT_LIST_HEAD(&obj->async_list); + INIT_LIST_HEAD(&obj->uevent.event_list); attr.cqe = cmd->cqe; attr.comp_vector = cmd->comp_vector; + attr.flags = cmd->flags; - if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) - attr.flags = cmd->flags; - - cq = ib_dev->create_cq(ib_dev, &attr, - file->ucontext, uhw); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); + cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); + if (!cq) { + ret = -ENOMEM; goto err_file; } - cq->device = ib_dev; - cq->uobject = &obj->uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = ev_file; + cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); - obj->uobject.object = cq; - ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); + ret = ib_dev->create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; + obj->uevent.uobject.object = cq; memset(&resp, 0, sizeof resp); - resp.base.cq_handle = obj->uobject.id; + resp.base.cq_handle = obj->uevent.uobject.id; resp.base.cqe = cq->cqe; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); - - ret = cb(file, obj, &resp, ucore, context); + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_cb; - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - - up_write(&obj->uobject.mutex); - + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return obj; err_cb: - idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - + ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); + cq = NULL; err_free: - ib_destroy_cq(cq); - + kfree(cq); err_file: if (ev_file) - ib_uverbs_release_ucq(file, ev_file, obj); + ib_uverbs_release_ucq(ev_file, obj); err: - put_uobj_write(&obj->uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ERR_PTR(ret); } -static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_ex_create_cq cmd_ex; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata ucore; - struct ib_udata uhw; struct ib_ucq_object *obj; + int ret; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - ib_uverbs_init_udata(&ucore, buf, - u64_to_user_ptr(cmd.response), sizeof(cmd), sizeof(resp)); - - ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response + sizeof(resp)), - in_len - sizeof(cmd), out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1508,44 +1088,19 @@ cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), comp_channel) + - sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, - NULL); - - if (IS_ERR(obj)) - return PTR_ERR(obj); - - return in_len; -} - -static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; + obj = create_cq(attrs, &cmd_ex); + return PTR_ERR_OR_ZERO(obj); } -int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_cq_resp resp; struct ib_uverbs_ex_create_cq cmd; struct ib_ucq_object *obj; - int err; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; + int ret; - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.comp_mask) return -EINVAL; @@ -1553,59 +1108,41 @@ if (cmd.reserved) return -EINVAL; - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - obj = create_cq(file, ib_dev, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_cq_cb, NULL); - - if (IS_ERR(obj)) - return PTR_ERR(obj); - - return 0; + obj = create_cq(attrs, &cmd); + return PTR_ERR_OR_ZERO(obj); } -ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_resize_cq cmd; - struct ib_uverbs_resize_cq_resp resp; - struct ib_udata udata; + struct ib_uverbs_resize_cq_resp resp = {}; struct ib_cq *cq; int ret = -EINVAL; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd, out_len - sizeof resp); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; - ret = cq->device->resize_cq(cq, cmd.cqe, &udata); + ret = cq->device->resize_cq(cq, cmd.cqe, &attrs->driver_udata); if (ret) goto out; resp.cqe = cq->cqe; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp.cqe)) - ret = -EFAULT; - + ret = uverbs_response(attrs, &resp, sizeof(resp)); out: - put_cq_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); - return ret ? ret : in_len; + return ret; } -static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, + struct ib_wc *wc) { struct ib_uverbs_wc tmp; @@ -1614,7 +1151,7 @@ tmp.opcode = wc->opcode; tmp.vendor_err = wc->vendor_err; tmp.byte_len = wc->byte_len; - tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data; + tmp.ex.imm_data = wc->ex.imm_data; tmp.qp_num = wc->qp->qp_num; tmp.src_qp = wc->src_qp; tmp.wc_flags = wc->wc_flags; @@ -1631,10 +1168,7 @@ return 0; } -ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp resp; @@ -1644,15 +1178,16 @@ struct ib_wc wc; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; /* we copy a struct ib_uverbs_poll_cq_resp to user space */ - header_ptr = (void __user *)(unsigned long) cmd.response; + header_ptr = attrs->ucore.outbuf; data_ptr = header_ptr + sizeof resp; memset(&resp, 0, sizeof resp); @@ -1663,7 +1198,7 @@ if (!ret) break; - ret = copy_wc_to_user(data_ptr, &wc); + ret = copy_wc_to_user(cq->device, data_ptr, &wc); if (ret) goto out_put; @@ -1675,135 +1210,98 @@ ret = -EFAULT; goto out_put; } + ret = 0; - ret = in_len; + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT)) + ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT); out_put: - put_cq_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } -ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_req_notify_cq cmd; struct ib_cq *cq; - int retval; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; - if (ib_req_notify_cq(cq, cmd.solicited_only ? - IB_CQ_SOLICITED : IB_CQ_NEXT_COMP) < 0) - retval = -ENXIO; - else - retval = in_len; - - put_cq_read(cq); + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - return retval; + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + return 0; } -ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; - struct ib_cq *cq; struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file; - int ret = -EINVAL; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) - return -EINVAL; - cq = uobj->object; - ev_file = cq->cq_context; - obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - - ret = ib_destroy_cq(cq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); + uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_ucq(file, ev_file, obj); - - memset(&resp, 0, sizeof resp); + obj = container_of(uobj, struct ib_ucq_object, uevent.uobject); + memset(&resp, 0, sizeof(resp)); resp.comp_events_reported = obj->comp_events_reported; - resp.async_events_reported = obj->async_events_reported; + resp.async_events_reported = obj->uevent.events_reported; - put_uobj(uobj); + uobj_put_destroy(uobj); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -static int create_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_qp *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *udata), - void *context) +static int create_qp(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_qp *cmd) { struct ib_uqp_object *obj; struct ib_device *device; struct ib_pd *pd = NULL; struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT); struct ib_cq *scq = NULL, *rcq = NULL; struct ib_srq *srq = NULL; struct ib_qp *qp; - char *buf; struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; + struct ib_device *ib_dev; if (cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW) != 0) return -EPERM; - obj = kzalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, - &qp_lock_class); + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, + &ib_dev); + if (IS_ERR(obj)) + return PTR_ERR(obj); + obj->uxrcd = NULL; + obj->uevent.uobject.user_handle = cmd->user_handle; mutex_init(&obj->mcast_lock); - down_write(&obj->uevent.uobject.mutex); - if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + - sizeof(cmd->rwq_ind_tbl_handle) && - (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, - file->ucontext); + + if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) { + ind_tbl = uobj_get_obj_read(rwq_ind_table, + UVERBS_OBJECT_RWQ_IND_TBL, + cmd->rwq_ind_tbl_handle, attrs); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1812,12 +1310,6 @@ attr.rwq_ind_tbl = ind_tbl; } - if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + - sizeof(cmd->reserved1)) && cmd->reserved1) { - ret = -EOPNOTSUPP; - goto err_put; - } - if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { ret = -EINVAL; goto err_put; @@ -1827,8 +1319,15 @@ has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, - &xrcd_uobj); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, + attrs); + + if (IS_ERR(xrcd_uobj)) { + ret = -EINVAL; + goto err_put; + } + + xrcd = (struct ib_xrcd *)xrcd_uobj->object; if (!xrcd) { ret = -EINVAL; goto err_put; @@ -1840,9 +1339,9 @@ cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = idr_read_srq(cmd->srq_handle, - file->ucontext); - if (!srq || srq->srq_type != IB_SRQT_BASIC) { + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, + cmd->srq_handle, attrs); + if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; } @@ -1850,8 +1349,9 @@ if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); + rcq = uobj_get_obj_read( + cq, UVERBS_OBJECT_CQ, + cmd->recv_cq_handle, attrs); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1861,10 +1361,12 @@ } if (has_sq) - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->send_cq_handle, attrs); if (!ind_tbl) rcq = rcq ?: scq; - pd = idr_read_pd(cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, + attrs); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1874,7 +1376,6 @@ } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; @@ -1890,35 +1391,36 @@ attr.cap.max_recv_sge = cmd->max_recv_sge; attr.cap.max_inline_data = cmd->max_inline_data; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + - sizeof(cmd->create_flags)) - attr.create_flags = cmd->create_flags; - + attr.create_flags = cmd->create_flags; if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | IB_QP_CREATE_MANAGED_RECV | - IB_QP_CREATE_SCATTER_FCS)) { + IB_QP_CREATE_SCATTER_FCS | + IB_QP_CREATE_CVLAN_STRIPPING | + IB_QP_CREATE_SOURCE_QPN | + IB_QP_CREATE_PCI_WRITE_END_PADDING)) { ret = -EINVAL; goto err_put; } - buf = (char *)cmd + sizeof(*cmd); - if (cmd_sz > sizeof(*cmd)) - if (!(buf[0] == 0 && !memcmp(buf, buf + 1, - cmd_sz - sizeof(*cmd) - 1))) { - ret = -EINVAL; + if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) { + if (priv_check(curthread, PRIV_NET_RAW)) { + ret = -EPERM; goto err_put; } + attr.source_qpn = cmd->source_qpn; + } + if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = device->create_qp(pd, &attr, uhw); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, + obj); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1926,17 +1428,6 @@ } if (cmd->qp_type != IB_QPT_XRC_TGT) { - qp->real_qp = qp; - qp->device = device; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); @@ -1948,14 +1439,10 @@ atomic_inc(&ind_tbl->usecnt); } else { /* It is done in _ib_create_qp for other QP types */ - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; } - qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; memset(&resp, 0, sizeof resp); resp.base.qpn = qp->qp_num; @@ -1965,11 +1452,9 @@ resp.base.max_recv_wr = attr.cap.max_recv_wr; resp.base.max_send_wr = attr.cap.max_send_wr; resp.base.max_inline_data = attr.cap.max_inline_data; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); - - ret = cb(file, &resp, ucore); + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_cb; @@ -1977,87 +1462,58 @@ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); + uobj_put_read(xrcd_uobj); } if (pd) - put_pd_read(pd); + uobj_put_obj_read(pd); if (scq) - put_cq_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - put_cq_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) - put_srq_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); + uobj_put_obj_read(ind_tbl); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return 0; err_cb: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - -err_destroy: - ib_destroy_qp(qp); + ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_put: - if (xrcd) - put_xrcd_read(xrcd_uobj); + if (!IS_ERR(xrcd_uobj)) + uobj_put_read(xrcd_uobj); if (pd) - put_pd_read(pd); + uobj_put_obj_read(pd); if (scq) - put_cq_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - put_cq_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) - put_srq_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); + uobj_put_obj_read(ind_tbl); - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } -static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_qp cmd; struct ib_uverbs_ex_create_qp cmd_ex; - struct ib_udata ucore; - struct ib_udata uhw; - ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); - int err; - - if (out_len < resp_size) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + int ret; - ib_uverbs_init_udata(&ucore, buf, - u64_to_user_ptr(cmd.response), sizeof(cmd), resp_size); - ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response + resp_size), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - resp_size); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -2074,43 +1530,17 @@ cmd_ex.qp_type = cmd.qp_type; cmd_ex.is_srq = cmd.is_srq; - err = create_qp(file, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), is_srq) + - sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, - NULL); - - if (err) - return err; - - return in_len; + return create_qp(attrs, &cmd_ex); } -static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) +static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs) { - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; -} - -int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_qp_resp resp; - struct ib_uverbs_ex_create_qp cmd = {0}; - int err; - - if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + - sizeof(cmd.comp_mask))) - return -EINVAL; + struct ib_uverbs_ex_create_qp cmd; + int ret; - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (err) - return err; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; @@ -2118,119 +1548,104 @@ if (cmd.reserved) return -EINVAL; - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - err = create_qp(file, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_qp_cb, NULL); - - if (err) - return err; - - return 0; + return create_qp(attrs, &cmd); } -ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_open_qp cmd; struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_qp *qp; - struct ib_qp_open_attr attr; + struct ib_qp_open_attr attr = {}; int ret; + struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd, out_len - sizeof resp); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, + &ib_dev); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs); + if (IS_ERR(xrcd_uobj)) { + ret = -EINVAL; + goto err_put; + } - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + xrcd = (struct ib_xrcd *)xrcd_uobj->object; if (!xrcd) { ret = -EINVAL; - goto err_put; + goto err_xrcd; } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; attr.qp_num = cmd.qpn; attr.qp_type = cmd.qp_type; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); qp = ib_open_qp(xrcd, &attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_put; + goto err_xrcd; } - qp->uobject = &obj->uevent.uobject; - obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd.user_handle; memset(&resp, 0, sizeof resp); resp.qpn = qp->qp_num; resp.qp_handle = obj->uevent.uobject.id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_remove; - } + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) + goto err_destroy; obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); + qp->uobject = obj; + uobj_put_read(xrcd_uobj); - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); - - return in_len; - -err_remove: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_destroy: - ib_destroy_qp(qp); - + ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); +err_xrcd: + uobj_put_read(xrcd_uobj); err_put: - put_xrcd_read(xrcd_uobj); - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } -ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, + struct ib_ah_attr *rdma_attr) +{ + uverb_attr->dlid = rdma_attr->dlid; + uverb_attr->sl = rdma_attr->sl; + uverb_attr->src_path_bits = rdma_attr->src_path_bits; + uverb_attr->static_rate = rdma_attr->static_rate; + uverb_attr->is_global = !!(rdma_attr->ah_flags & IB_AH_GRH); + if (uverb_attr->is_global) { + const struct ib_global_route *grh = &rdma_attr->grh; + + memcpy(uverb_attr->dgid, grh->dgid.raw, 16); + uverb_attr->flow_label = grh->flow_label; + uverb_attr->sgid_index = grh->sgid_index; + uverb_attr->hop_limit = grh->hop_limit; + uverb_attr->traffic_class = grh->traffic_class; + } + uverb_attr->port_num = rdma_attr->port_num; +} + +static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_qp cmd; struct ib_uverbs_query_qp_resp resp; @@ -2239,8 +1654,9 @@ struct ib_qp_init_attr *init_attr; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; attr = kmalloc(sizeof *attr, GFP_KERNEL); init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); @@ -2249,7 +1665,7 @@ goto out; } - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { ret = -EINVAL; goto out; @@ -2257,7 +1673,8 @@ ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - put_qp_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) goto out; @@ -2286,29 +1703,8 @@ resp.alt_port_num = attr->alt_port_num; resp.alt_timeout = attr->alt_timeout; - memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); - resp.dest.flow_label = attr->ah_attr.grh.flow_label; - resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; - resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; - resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; - resp.dest.dlid = attr->ah_attr.dlid; - resp.dest.sl = attr->ah_attr.sl; - resp.dest.src_path_bits = attr->ah_attr.src_path_bits; - resp.dest.static_rate = attr->ah_attr.static_rate; - resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); - resp.dest.port_num = attr->ah_attr.port_num; - - memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); - resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; - resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; - resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; - resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; - resp.alt_dest.dlid = attr->alt_ah_attr.dlid; - resp.alt_dest.sl = attr->alt_ah_attr.sl; - resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; - resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; - resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); - resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + copy_ah_attr_to_uverbs(&resp.dest, &attr->ah_attr); + copy_ah_attr_to_uverbs(&resp.alt_dest, &attr->alt_ah_attr); resp.max_send_wr = init_attr->cap.max_send_wr; resp.max_recv_wr = init_attr->cap.max_recv_wr; @@ -2317,15 +1713,13 @@ resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); out: kfree(attr); kfree(init_attr); - return ret ? ret : in_len; + return ret; } /* Remove ignored fields set in the attribute mask */ @@ -2342,193 +1736,270 @@ } } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static void copy_ah_attr_from_uverbs(struct ib_device *dev, + struct ib_ah_attr *rdma_attr, + struct ib_uverbs_qp_dest *uverb_attr) { - struct ib_uverbs_modify_qp cmd; - struct ib_udata udata; - struct ib_qp *qp; - struct ib_qp_attr *attr; - int ret; + if (uverb_attr->is_global) { + struct ib_global_route *grh = &rdma_attr->grh; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + grh->flow_label = uverb_attr->flow_label; + grh->sgid_index = uverb_attr->sgid_index; + grh->hop_limit = uverb_attr->hop_limit; + grh->traffic_class = uverb_attr->traffic_class; + memcpy(grh->dgid.raw, uverb_attr->dgid, sizeof(grh->dgid)); + rdma_attr->ah_flags = IB_AH_GRH; + } else { + rdma_attr->ah_flags = 0; + } + rdma_attr->dlid = uverb_attr->dlid; + rdma_attr->sl = uverb_attr->sl; + rdma_attr->src_path_bits = uverb_attr->src_path_bits; + rdma_attr->static_rate = uverb_attr->static_rate; + rdma_attr->port_num = uverb_attr->port_num; +} - ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); +static int modify_qp(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_modify_qp *cmd) +{ + struct ib_qp_attr *attr; + struct ib_qp *qp; + int ret; - attr = kmalloc(sizeof *attr, GFP_KERNEL); + attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, + attrs); if (!qp) { ret = -EINVAL; goto out; } - if ((cmd.attr_mask & IB_QP_PORT) && - !rdma_is_port_valid(qp->device, cmd.port_num)) { + if ((cmd->base.attr_mask & IB_QP_PORT) && + !rdma_is_port_valid(qp->device, cmd->base.port_num)) { ret = -EINVAL; goto release_qp; } - if ((cmd.attr_mask & IB_QP_AV) && - !rdma_is_port_valid(qp->device, cmd.dest.port_num)) { - ret = -EINVAL; - goto release_qp; + if ((cmd->base.attr_mask & IB_QP_AV)) { + if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + + if (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state == IB_QPS_RTR) { + /* We are in INIT->RTR TRANSITION (if we are not, + * this transition will be rejected in subsequent checks). + * In the INIT->RTR transition, we cannot have IB_QP_PORT set, + * but the IB_QP_STATE flag is required. + * + * Since kernel 3.14 (commit dbf727de7440), the uverbs driver, + * when IB_QP_AV is set, has required inclusion of a valid + * port number in the primary AV. (AVs are created and handled + * differently for infiniband and ethernet (RoCE) ports). + * + * Check the port number included in the primary AV against + * the port number in the qp struct, which was set (and saved) + * in the RST->INIT transition. + */ + if (cmd->base.dest.port_num != qp->real_qp->port) { + ret = -EINVAL; + goto release_qp; + } + } else { + /* We are in SQD->SQD. (If we are not, this transition will + * be rejected later in the verbs layer checks). + * Check for both IB_QP_PORT and IB_QP_AV, these can be set + * together in the SQD->SQD transition. + * + * If only IP_QP_AV was set, add in IB_QP_PORT as well (the + * verbs layer driver does not track primary port changes + * resulting from path migration. Thus, in SQD, if the primary + * AV is modified, the primary port should also be modified). + * + * Note that in this transition, the IB_QP_STATE flag + * is not allowed. + */ + if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == (IB_QP_AV | IB_QP_PORT)) && + cmd->base.port_num != cmd->base.dest.port_num) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == IB_QP_AV) { + cmd->base.attr_mask |= IB_QP_PORT; + cmd->base.port_num = cmd->base.dest.port_num; + } + } } - if ((cmd.attr_mask & IB_QP_ALT_PATH) && - (!rdma_is_port_valid(qp->device, cmd.alt_port_num) || - !rdma_is_port_valid(qp->device, cmd.alt_dest.port_num))) { + if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && + (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) || + cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) { ret = -EINVAL; goto release_qp; } - attr->qp_state = cmd.qp_state; - attr->cur_qp_state = cmd.cur_qp_state; - attr->path_mtu = cmd.path_mtu; - attr->path_mig_state = cmd.path_mig_state; - attr->qkey = cmd.qkey; - attr->rq_psn = cmd.rq_psn; - attr->sq_psn = cmd.sq_psn; - attr->dest_qp_num = cmd.dest_qp_num; - attr->qp_access_flags = cmd.qp_access_flags; - attr->pkey_index = cmd.pkey_index; - attr->alt_pkey_index = cmd.alt_pkey_index; - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; - attr->max_rd_atomic = cmd.max_rd_atomic; - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; - attr->min_rnr_timer = cmd.min_rnr_timer; - attr->port_num = cmd.port_num; - attr->timeout = cmd.timeout; - attr->retry_cnt = cmd.retry_cnt; - attr->rnr_retry = cmd.rnr_retry; - attr->alt_port_num = cmd.alt_port_num; - attr->alt_timeout = cmd.alt_timeout; - - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; - attr->ah_attr.dlid = cmd.dest.dlid; - attr->ah_attr.sl = cmd.dest.sl; - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; - attr->ah_attr.static_rate = cmd.dest.static_rate; - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd.dest.port_num; - - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - - if (qp->real_qp == qp) { - if (cmd.attr_mask & IB_QP_AV) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto release_qp; - } - ret = qp->device->modify_qp(qp, attr, - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); - } else { - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); - } - - if (ret) + if ((cmd->base.attr_mask & IB_QP_CUR_STATE && + cmd->base.cur_qp_state > IB_QPS_ERR) || + (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state > IB_QPS_ERR)) { + ret = -EINVAL; goto release_qp; + } - ret = in_len; + if (cmd->base.attr_mask & IB_QP_STATE) + attr->qp_state = cmd->base.qp_state; + if (cmd->base.attr_mask & IB_QP_CUR_STATE) + attr->cur_qp_state = cmd->base.cur_qp_state; + if (cmd->base.attr_mask & IB_QP_PATH_MTU) + attr->path_mtu = cmd->base.path_mtu; + if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE) + attr->path_mig_state = cmd->base.path_mig_state; + if (cmd->base.attr_mask & IB_QP_QKEY) + attr->qkey = cmd->base.qkey; + if (cmd->base.attr_mask & IB_QP_RQ_PSN) + attr->rq_psn = cmd->base.rq_psn; + if (cmd->base.attr_mask & IB_QP_SQ_PSN) + attr->sq_psn = cmd->base.sq_psn; + if (cmd->base.attr_mask & IB_QP_DEST_QPN) + attr->dest_qp_num = cmd->base.dest_qp_num; + if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS) + attr->qp_access_flags = cmd->base.qp_access_flags; + if (cmd->base.attr_mask & IB_QP_PKEY_INDEX) + attr->pkey_index = cmd->base.pkey_index; + if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; + if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + attr->max_rd_atomic = cmd->base.max_rd_atomic; + if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; + if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER) + attr->min_rnr_timer = cmd->base.min_rnr_timer; + if (cmd->base.attr_mask & IB_QP_PORT) + attr->port_num = cmd->base.port_num; + if (cmd->base.attr_mask & IB_QP_TIMEOUT) + attr->timeout = cmd->base.timeout; + if (cmd->base.attr_mask & IB_QP_RETRY_CNT) + attr->retry_cnt = cmd->base.retry_cnt; + if (cmd->base.attr_mask & IB_QP_RNR_RETRY) + attr->rnr_retry = cmd->base.rnr_retry; + if (cmd->base.attr_mask & IB_QP_ALT_PATH) { + attr->alt_port_num = cmd->base.alt_port_num; + attr->alt_timeout = cmd->base.alt_timeout; + attr->alt_pkey_index = cmd->base.alt_pkey_index; + } + if (cmd->base.attr_mask & IB_QP_RATE_LIMIT) + attr->rate_limit = cmd->rate_limit; + + if (cmd->base.attr_mask & IB_QP_AV) + copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr, + &cmd->base.dest); + + if (cmd->base.attr_mask & IB_QP_ALT_PATH) + copy_ah_attr_from_uverbs(qp->device, &attr->alt_ah_attr, + &cmd->base.alt_dest); + + ret = ib_modify_qp_with_udata(qp, attr, + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask), + &attrs->driver_udata); release_qp: - put_qp_read(qp); - + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); out: kfree(attr); return ret; } -ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_destroy_qp cmd; - struct ib_uverbs_destroy_qp_resp resp; - struct ib_uobject *uobj; - struct ib_qp *qp; - struct ib_uqp_object *obj; - int ret = -EINVAL; + struct ib_uverbs_ex_modify_qp cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base)); + if (ret) + return ret; - memset(&resp, 0, sizeof resp); + if (cmd.base.attr_mask & + ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; - uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); - if (!uobj) - return -EINVAL; - qp = uobj->object; - obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); + return modify_qp(attrs, &cmd); +} - if (!list_empty(&obj->mcast_list)) { - put_uobj_write(uobj); - return -EBUSY; - } +static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_ex_modify_qp cmd; + struct ib_uverbs_ex_modify_qp_resp resp = { + .response_length = uverbs_response_length(attrs, sizeof(resp)) + }; + int ret; - ret = ib_destroy_qp(qp); - if (!ret) - uobj->live = 0; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - put_uobj_write(uobj); + /* + * Last bit is reserved for extending the attr_mask by + * using another field. + */ + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); + + if (cmd.base.attr_mask & + ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + ret = modify_qp(attrs, &cmd); if (ret) return ret; - if (obj->uxrcd) - atomic_dec(&obj->uxrcd->refcnt); + return uverbs_response(attrs, &resp, sizeof(resp)); +} - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); +static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_destroy_qp cmd; + struct ib_uverbs_destroy_qp_resp resp; + struct ib_uobject *uobj; + struct ib_uqp_object *obj; + int ret; - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - ib_uverbs_release_uevent(file, &obj->uevent); + uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); + memset(&resp, 0, sizeof(resp)); resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); + uobj_put_destroy(uobj); - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } static void *alloc_wr(size_t wr_size, __u32 num_sge) { + if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) / + sizeof (struct ib_sge)) + return NULL; + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + num_sge * sizeof (struct ib_sge), GFP_KERNEL); -}; +} -ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; @@ -2538,33 +2009,41 @@ struct ib_qp *qp; int i, sg_ind; int is_ud; - ssize_t ret = -EINVAL; + int ret, ret2; size_t next_size; + const struct ib_sge __user *sgls; + const void __user *wqes; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + - cmd.sge_count * sizeof (struct ib_uverbs_sge)) - return -EINVAL; - - if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) - return -EINVAL; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; + wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count); + if (IS_ERR(wqes)) + return PTR_ERR(wqes); + sgls = uverbs_request_next_ptr( + &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge)); + if (IS_ERR(sgls)) + return PTR_ERR(sgls); + ret = uverbs_request_finish(&iter); + if (ret) + return ret; user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); if (!user_wr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); + if (!qp) { + ret = -EINVAL; goto out; + } is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { - if (copy_from_user(user_wr, - buf + sizeof cmd + i * cmd.wqe_size, + if (copy_from_user(user_wr, (const u8 *)wqes + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; goto out_put; @@ -2591,7 +2070,8 @@ goto out_put; } - ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, + user_wr->wr.ud.ah, attrs); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2671,11 +2151,9 @@ if (next->num_sge) { next->sg_list = (void *)((char *)next + ALIGN(next_size, sizeof(struct ib_sge))); - if (copy_from_user(next->sg_list, - (const char *)buf + sizeof cmd + - cmd.wr_count * cmd.wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { + if (copy_from_user(next->sg_list, sgls + sg_ind, + next->num_sge * + sizeof(struct ib_sge))) { ret = -EFAULT; goto out_put; } @@ -2693,16 +2171,17 @@ break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out_put: - put_qp_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); while (wr) { if (is_ud && ud_wr(wr)->ah) - put_ah_read(ud_wr(wr)->ah); + uobj_put_obj_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; @@ -2711,28 +2190,35 @@ out: kfree(user_wr); - return ret ? ret : in_len; + return ret; } -static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, - int in_len, - u32 wr_count, - u32 sge_count, - u32 wqe_size) +static struct ib_recv_wr * +ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count, + u32 wqe_size, u32 sge_count) { struct ib_uverbs_recv_wr *user_wr; struct ib_recv_wr *wr = NULL, *last, *next; int sg_ind; int i; int ret; - - if (in_len < wqe_size * wr_count + - sge_count * sizeof (struct ib_uverbs_sge)) - return ERR_PTR(-EINVAL); + const struct ib_sge __user *sgls; + const void __user *wqes; if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); + wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count); + if (IS_ERR(wqes)) + return ERR_CAST(wqes); + sgls = uverbs_request_next_ptr( + iter, sge_count * sizeof(struct ib_uverbs_sge)); + if (IS_ERR(sgls)) + return ERR_CAST(sgls); + ret = uverbs_request_finish(iter); + if (ret) + return ERR_PTR(ret); + user_wr = kmalloc(wqe_size, GFP_KERNEL); if (!user_wr) return ERR_PTR(-ENOMEM); @@ -2740,7 +2226,7 @@ sg_ind = 0; last = NULL; for (i = 0; i < wr_count; ++i) { - if (copy_from_user(user_wr, buf + i * wqe_size, + if (copy_from_user(user_wr, (const char *)wqes + i * wqe_size, wqe_size)) { ret = -EFAULT; goto err; @@ -2751,6 +2237,13 @@ goto err; } + if (user_wr->num_sge >= + (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) / + sizeof (struct ib_sge)) { + ret = -EINVAL; + goto err; + } + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + user_wr->num_sge * sizeof (struct ib_sge), GFP_KERNEL); @@ -2772,10 +2265,9 @@ if (next->num_sge) { next->sg_list = (void *)((char *)next + ALIGN(sizeof *next, sizeof (struct ib_sge))); - if (copy_from_user(next->sg_list, - (const char *)buf + wr_count * wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { + if (copy_from_user(next->sg_list, sgls + sg_ind, + next->num_sge * + sizeof(struct ib_sge))) { ret = -EFAULT; goto err; } @@ -2799,47 +2291,47 @@ return ERR_PTR(ret); } -ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; struct ib_recv_wr *wr, *next; const struct ib_recv_wr *bad_wr; struct ib_qp *qp; - ssize_t ret = -EINVAL; + int ret, ret2; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); + wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size, + cmd.sge_count); if (IS_ERR(wr)) return PTR_ERR(wr); - qp = idr_read_qp(cmd.qp_handle, file->ucontext); - if (!qp) + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); + if (!qp) { + ret = -EINVAL; goto out; + } resp.bad_wr = 0; ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); - put_qp_read(qp); - - if (ret) + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + if (ret) { for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } + } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out: while (wr) { next = wr->next; @@ -2847,38 +2339,39 @@ wr = next; } - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; struct ib_recv_wr *wr, *next; const struct ib_recv_wr *bad_wr; struct ib_srq *srq; - ssize_t ret = -EINVAL; + int ret, ret2; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); + wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size, + cmd.sge_count); if (IS_ERR(wr)) return PTR_ERR(wr); - srq = idr_read_srq(cmd.srq_handle, file->ucontext); - if (!srq) + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); + if (!srq) { + ret = -EINVAL; goto out; + } resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - put_srq_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) for (next = wr; next; next = next->next) { @@ -2887,9 +2380,9 @@ break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out: while (wr) { @@ -2898,154 +2391,103 @@ wr = next; } - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_ah cmd; struct ib_uverbs_create_ah_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; - struct ib_ah_attr attr; + struct ib_ah_attr attr = {}; int ret; - struct ib_udata udata; - - if (out_len < sizeof resp) - return -ENOSPC; + struct ib_device *ib_dev; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) - return -EINVAL; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response + sizeof(resp)), - in_len - sizeof(cmd), out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; + uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); - down_write(&uobj->mutex); + if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) { + ret = -EINVAL; + goto err; + } - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err; } - attr.dlid = cmd.attr.dlid; - attr.sl = cmd.attr.sl; - attr.src_path_bits = cmd.attr.src_path_bits; - attr.static_rate = cmd.attr.static_rate; - attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; - attr.port_num = cmd.attr.port_num; - attr.grh.flow_label = cmd.attr.grh.flow_label; - attr.grh.sgid_index = cmd.attr.grh.sgid_index; - attr.grh.hop_limit = cmd.attr.grh.hop_limit; - attr.grh.traffic_class = cmd.attr.grh.traffic_class; - memset(&attr.dmac, 0, sizeof(attr.dmac)); - memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - - ah = pd->device->create_ah(pd, &attr, &udata); + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + + if (cmd.attr.is_global) { + struct ib_global_route *grh = &attr.grh; + + grh->flow_label = cmd.attr.grh.flow_label; + grh->sgid_index = cmd.attr.grh.sgid_index; + grh->hop_limit = cmd.attr.grh.hop_limit; + grh->traffic_class = cmd.attr.grh.traffic_class; + memcpy(grh->dgid.raw, cmd.attr.grh.dgid, sizeof(grh->dgid)); + attr.ah_flags = IB_AH_GRH; + } else { + attr.ah_flags = 0; + } + ah = ib_create_user_ah(pd, &attr, &attrs->driver_udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } - ah->device = pd->device; - ah->pd = pd; - atomic_inc(&pd->usecnt); ah->uobject = uobj; + uobj->user_handle = cmd.user_handle; uobj->object = ah; - ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); - if (ret) - goto err_destroy; - resp.ah_handle = uobj->id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->ah_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - up_write(&uobj->mutex); - - return in_len; + uobj_put_obj_read(pd); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - -err_destroy: - ib_destroy_ah(ah); + ib_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, + uverbs_get_cleared_udata(attrs)); err_put: - put_pd_read(pd); + uobj_put_obj_read(pd); err: - put_uobj_write(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } -ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_ah cmd; - struct ib_ah *ah; - struct ib_uobject *uobj; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); - if (!uobj) - return -EINVAL; - ah = uobj->object; - - ret = ib_destroy_ah(ah); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs); } -ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; @@ -3053,14 +2495,15 @@ struct ib_uverbs_mcast_entry *mcast; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - qp = idr_write_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -3087,31 +2530,30 @@ out_put: mutex_unlock(&obj->mcast_lock); - put_qp_write(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_detach_mcast cmd; struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; - int ret = -EINVAL; + int ret; bool found = false; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - qp = idr_write_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -3132,18 +2574,151 @@ out_put: mutex_unlock(&obj->mcast_lock); - put_qp_write(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + return ret; +} + +struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) +{ + struct ib_uflow_resources *resources; + + resources = kzalloc(sizeof(*resources), GFP_KERNEL); + + if (!resources) + return NULL; + + if (!num_specs) + goto out; + + resources->counters = + kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); + resources->collection = + kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); + + if (!resources->counters || !resources->collection) + goto err; + +out: + resources->max = num_specs; + return resources; + +err: + kfree(resources->counters); + kfree(resources); + + return NULL; +} +EXPORT_SYMBOL(flow_resources_alloc); + +void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) +{ + unsigned int i; + + if (!uflow_res) + return; + + for (i = 0; i < uflow_res->collection_num; i++) + atomic_dec(&uflow_res->collection[i]->usecnt); + + for (i = 0; i < uflow_res->counters_num; i++) + atomic_dec(&uflow_res->counters[i]->usecnt); - return ret ? ret : in_len; + kfree(uflow_res->collection); + kfree(uflow_res->counters); + kfree(uflow_res); } +EXPORT_SYMBOL(ib_uverbs_flow_resources_free); -static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) +void flow_resources_add(struct ib_uflow_resources *uflow_res, + enum ib_flow_spec_type type, + void *ibobj) { - /* Returns user space filter size, includes padding */ - return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; + WARN_ON(uflow_res->num >= uflow_res->max); + + switch (type) { + case IB_FLOW_SPEC_ACTION_HANDLE: + atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt); + uflow_res->collection[uflow_res->collection_num++] = + (struct ib_flow_action *)ibobj; + break; + case IB_FLOW_SPEC_ACTION_COUNT: + atomic_inc(&((struct ib_counters *)ibobj)->usecnt); + uflow_res->counters[uflow_res->counters_num++] = + (struct ib_counters *)ibobj; + break; + default: + WARN_ON(1); + } + + uflow_res->num++; +} +EXPORT_SYMBOL(flow_resources_add); + +static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec, + struct ib_uflow_resources *uflow_res) +{ + ib_spec->type = kern_spec->type; + switch (ib_spec->type) { + case IB_FLOW_SPEC_ACTION_TAG: + if (kern_spec->flow_tag.size != + sizeof(struct ib_uverbs_flow_spec_action_tag)) + return -EINVAL; + + ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag); + ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id; + break; + case IB_FLOW_SPEC_ACTION_DROP: + if (kern_spec->drop.size != + sizeof(struct ib_uverbs_flow_spec_action_drop)) + return -EINVAL; + + ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop); + break; + case IB_FLOW_SPEC_ACTION_HANDLE: + if (kern_spec->action.size != + sizeof(struct ib_uverbs_flow_spec_action_handle)) + return -EOPNOTSUPP; + ib_spec->action.act = uobj_get_obj_read(flow_action, + UVERBS_OBJECT_FLOW_ACTION, + kern_spec->action.handle, + attrs); + if (!ib_spec->action.act) + return -EINVAL; + ib_spec->action.size = + sizeof(struct ib_flow_spec_action_handle); + flow_resources_add(uflow_res, + IB_FLOW_SPEC_ACTION_HANDLE, + ib_spec->action.act); + uobj_put_obj_read(ib_spec->action.act); + break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (kern_spec->flow_count.size != + sizeof(struct ib_uverbs_flow_spec_action_count)) + return -EINVAL; + ib_spec->flow_count.counters = + uobj_get_obj_read(counters, + UVERBS_OBJECT_COUNTERS, + kern_spec->flow_count.handle, + attrs); + if (!ib_spec->flow_count.counters) + return -EINVAL; + ib_spec->flow_count.size = + sizeof(struct ib_flow_spec_action_count); + flow_resources_add(uflow_res, + IB_FLOW_SPEC_ACTION_COUNT, + ib_spec->flow_count.counters); + uobj_put_obj_read(ib_spec->flow_count.counters); + break; + default: + return -EINVAL; + } + return 0; } -static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, +static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { /* @@ -3152,7 +2727,7 @@ */ if (kern_filter_size > ib_real_filter_sz) { - if (memchr_inv((char *)kern_spec_filter + + if (memchr_inv((const char *)kern_spec_filter + ib_real_filter_sz, 0, kern_filter_size - ib_real_filter_sz)) return -EINVAL; @@ -3161,30 +2736,25 @@ return kern_filter_size; } -static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) +int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, + const void *kern_spec_mask, + const void *kern_spec_val, + size_t kern_filter_sz, + union ib_flow_spec *ib_spec) { ssize_t actual_filter_sz; - ssize_t kern_filter_sz; ssize_t ib_filter_sz; - void *kern_spec_mask; - void *kern_spec_val; - - if (kern_spec->reserved) - return -EINVAL; - - ib_spec->type = kern_spec->type; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); /* User flow spec size must be aligned to 4 bytes */ if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) return -EINVAL; - kern_spec_val = (char *)kern_spec + - sizeof(struct ib_uverbs_flow_spec_hdr); - kern_spec_mask = (char *)kern_spec_val + kern_filter_sz; + ib_spec->type = type; - switch (ib_spec->type) { + if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL)) + return -EINVAL; + + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, @@ -3222,17 +2792,65 @@ (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) return -EINVAL; break; - case IB_FLOW_SPEC_TCP: - case IB_FLOW_SPEC_UDP: - ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); + memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); + break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel); + memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) || + (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24)) + return -EINVAL; + break; + case IB_FLOW_SPEC_ESP: + ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->esp.size = sizeof(struct ib_flow_spec_esp); + memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz); + break; + case IB_FLOW_SPEC_GRE: + ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->gre.size = sizeof(struct ib_flow_spec_gre); + memcpy(&ib_spec->gre.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz); + break; + case IB_FLOW_SPEC_MPLS: + ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); if (actual_filter_sz <= 0) return -EINVAL; - ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); - memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); - memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); + ib_spec->mpls.size = sizeof(struct ib_flow_spec_mpls); + memcpy(&ib_spec->mpls.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->mpls.mask, kern_spec_mask, actual_filter_sz); break; default: return -EINVAL; @@ -3240,12 +2858,46 @@ return 0; } -int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + size_t kern_filter_sz; + void *kern_spec_mask; + void *kern_spec_val; + + if (kern_spec->hdr.size < sizeof(struct ib_uverbs_flow_spec_hdr)) + return -EINVAL; + kern_filter_sz = kern_spec->hdr.size - sizeof(struct ib_uverbs_flow_spec_hdr); + kern_filter_sz /= 2; + + kern_spec_val = (u8 *)kern_spec + + sizeof(struct ib_uverbs_flow_spec_hdr); + kern_spec_mask = (u8 *)kern_spec_val + kern_filter_sz; + + return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type, + kern_spec_mask, + kern_spec_val, + kern_filter_sz, ib_spec); +} + +static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec, + struct ib_uflow_resources *uflow_res) +{ + if (kern_spec->reserved) + return -EINVAL; + + if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) + return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec, + uflow_res); + else + return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); +} + +static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq cmd; struct ib_uverbs_ex_create_wq_resp resp = {}; struct ib_uwq_object *obj; int err = 0; @@ -3253,44 +2905,27 @@ struct ib_pd *pd; struct ib_wq *wq; struct ib_wq_init_attr wq_init_attr = {}; - size_t required_cmd_sz; - size_t required_resp_len; - - required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); - required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; + struct ib_device *ib_dev; - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + err = uverbs_request(attrs, &cmd, sizeof(cmd)); if (err) return err; if (cmd.comp_mask) return -EOPNOTSUPP; - obj = kmalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs, + &ib_dev); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, - &wq_lock_class); - down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3299,18 +2934,19 @@ wq_init_attr.cq = cq; wq_init_attr.max_sge = cmd.max_sge; wq_init_attr.max_wr = cmd.max_wr; - wq_init_attr.wq_context = file; + wq_init_attr.wq_context = attrs->ufile; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; - obj->uevent.events_reported = 0; + wq_init_attr.create_flags = cmd.create_flags; INIT_LIST_HEAD(&obj->uevent.event_list); - wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + + wq = pd->device->create_wq(pd, &wq_init_attr, &attrs->driver_udata); if (IS_ERR(wq)) { err = PTR_ERR(wq); goto err_put_cq; } - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; wq->wq_type = wq_init_attr.wq_type; wq->cq = cq; @@ -3320,166 +2956,106 @@ atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; - err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); - if (err) - goto destroy_wq; memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; resp.max_sge = wq_init_attr.max_sge; resp.max_wr = wq_init_attr.max_wr; resp.wqn = wq->wq_num; - resp.response_length = required_resp_len; - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; - put_pd_read(pd); - put_cq_read(cq); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); + uobj_put_obj_read(pd); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); -destroy_wq: - ib_destroy_wq(wq); + ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: - put_cq_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_pd: - put_pd_read(pd); + uobj_put_obj_read(pd); err_uobj: - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return err; } -int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq cmd; struct ib_uverbs_ex_destroy_wq_resp resp = {}; - struct ib_wq *wq; struct ib_uobject *uobj; struct ib_uwq_object *obj; - size_t required_cmd_sz; - size_t required_resp_len; int ret; - required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); - required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EOPNOTSUPP; - resp.response_length = required_resp_len; - uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, - file->ucontext); - if (!uobj) - return -EINVAL; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - wq = uobj->object; obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - ret = ib_destroy_wq(wq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); - ret = ib_copy_to_udata(ucore, &resp, resp.response_length); - if (ret) - return ret; + uobj_put_destroy(uobj); - return 0; + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_uverbs_ex_modify_wq cmd; struct ib_wq *wq; struct ib_wq_attr wq_attr = {}; - size_t required_cmd_sz; int ret; - required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (!cmd.attr_mask) return -EINVAL; - if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = idr_read_wq(cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); if (!wq) return -EINVAL; wq_attr.curr_wq_state = cmd.curr_wq_state; wq_attr.wq_state = cmd.wq_state; - ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); - put_wq_read(wq); + if (cmd.attr_mask & IB_WQ_FLAGS) { + wq_attr.flags = cmd.flags; + wq_attr.flags_mask = cmd.flags_mask; + } + ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, + &attrs->driver_udata); + rdma_lookup_put_uobject(&wq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } -int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table cmd; struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; struct ib_uobject *uobj; - int err = 0; + int err; struct ib_rwq_ind_table_init_attr init_attr = {}; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_wq **wqs = NULL; @@ -3487,26 +3063,13 @@ struct ib_wq *wq = NULL; int i, j, num_read_wqs; u32 num_wq_handles; - u32 expected_in_size; - size_t required_cmd_sz_header; - size_t required_resp_len; - - required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); - required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); - - if (ucore->inlen < required_cmd_sz_header) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; + struct uverbs_req_iter iter; + struct ib_device *ib_dev; - err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (err) return err; - ucore->inbuf = (const char *)ucore->inbuf + required_cmd_sz_header; - ucore->inlen -= required_cmd_sz_header; - if (cmd.comp_mask) return -EOPNOTSUPP; @@ -3514,26 +3077,17 @@ return -EINVAL; num_wq_handles = 1 << cmd.log_ind_tbl_size; - expected_in_size = num_wq_handles * sizeof(__u32); - if (num_wq_handles == 1) - /* input size for wq handles is u64 aligned */ - expected_in_size += sizeof(__u32); - - if (ucore->inlen < expected_in_size) - return -EINVAL; - - if (ucore->inlen > expected_in_size && - !ib_is_udata_cleared(ucore, expected_in_size, - ucore->inlen - expected_in_size)) - return -EOPNOTSUPP; - wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), GFP_KERNEL); if (!wqs_handles) return -ENOMEM; - err = ib_copy_from_udata(wqs_handles, ucore, - num_wq_handles * sizeof(__u32)); + err = uverbs_request_next(&iter, wqs_handles, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + err = uverbs_request_finish(&iter); if (err) goto err_free; @@ -3545,7 +3099,8 @@ for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, + wqs_handles[num_read_wqs], attrs); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3554,17 +3109,17 @@ wqs[num_read_wqs] = wq; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev); + if (IS_ERR(uobj)) { + err = PTR_ERR(uobj); goto put_wqs; } - init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); - down_write(&uobj->mutex); init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; - rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + + rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, + &attrs->driver_udata); if (IS_ERR(rwq_ind_tbl)) { err = PTR_ERR(rwq_ind_tbl); @@ -3581,108 +3136,54 @@ for (i = 0; i < num_wq_handles; i++) atomic_inc(&wqs[i]->usecnt); - err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - if (err) - goto destroy_ind_tbl; - resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; - resp.response_length = required_resp_len; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + rdma_alloc_commit_uobject(uobj, attrs); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); -destroy_ind_tbl: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: - put_uobj_write(uobj); + uobj_alloc_abort(uobj, attrs); put_wqs: for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_free: kfree(wqs_handles); kfree(wqs); return err; } -int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_rwq_ind_table *rwq_ind_tbl; - struct ib_uobject *uobj; - int ret; - struct ib_wq **ind_tbl; - size_t required_cmd_sz; - - required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; + struct ib_uverbs_ex_destroy_rwq_ind_table cmd; + int ret; - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - rwq_ind_tbl = uobj->object; - ind_tbl = rwq_ind_tbl->ind_tbl; - - ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - kfree(ind_tbl); - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, + cmd.ind_tbl_handle, attrs); } -int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow_resp resp; @@ -3691,24 +3192,18 @@ struct ib_uverbs_flow_attr *kern_flow_attr; struct ib_flow_attr *flow_attr; struct ib_qp *qp; - int err = 0; - void *kern_spec; + struct ib_uflow_resources *uflow_res; + struct ib_uverbs_flow_spec_hdr *kern_spec; + struct uverbs_req_iter iter; + int err; void *ib_spec; int i; + struct ib_device *ib_dev; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - if (ucore->outlen < sizeof(resp)) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (err) return err; - ucore->inbuf = (const char *)ucore->inbuf + sizeof(cmd); - ucore->inlen -= sizeof(cmd); - if (cmd.comp_mask) return -EINVAL; @@ -3726,8 +3221,7 @@ if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) return -EINVAL; - if (cmd.flow_attr.size > ucore->inlen || - cmd.flow_attr.size > + if (cmd.flow_attr.size > (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) return -EINVAL; @@ -3741,35 +3235,47 @@ if (!kern_flow_attr) return -ENOMEM; - memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); - err = ib_copy_from_udata(kern_flow_attr + 1, ucore, - cmd.flow_attr.size); + *kern_flow_attr = cmd.flow_attr; + err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs, + cmd.flow_attr.size); if (err) goto err_free_attr; } else { kern_flow_attr = &cmd.flow_attr; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; + err = uverbs_request_finish(&iter); + if (err) + goto err_free_attr; + + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev); + if (IS_ERR(uobj)) { + err = PTR_ERR(uobj); goto err_free_attr; } - init_uobj(uobj, 0, file->ucontext, &rule_lock_class); - down_write(&uobj->mutex); - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { err = -EINVAL; goto err_uobj; } - flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * - sizeof(union ib_flow_spec), GFP_KERNEL); + if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) { + err = -EINVAL; + goto err_put; + } + + flow_attr = kzalloc(struct_size(flow_attr, flows, + cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { err = -ENOMEM; goto err_put; } + uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs); + if (!uflow_res) { + err = -ENOMEM; + goto err_free_flow_attr; + } flow_attr->type = kern_flow_attr->type; flow_attr->priority = kern_flow_attr->priority; @@ -3778,20 +3284,23 @@ flow_attr->flags = kern_flow_attr->flags; flow_attr->size = sizeof(*flow_attr); - kern_spec = kern_flow_attr + 1; + kern_spec = kern_flow_attr->flow_specs; ib_spec = flow_attr + 1; for (i = 0; i < flow_attr->num_of_specs && - cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && - cmd.flow_attr.size >= - ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(kern_spec, ib_spec); + cmd.flow_attr.size >= sizeof(*kern_spec) && + cmd.flow_attr.size >= kern_spec->size; + i++) { + err = kern_spec_to_ib_spec( + attrs, (struct ib_uverbs_flow_spec *)kern_spec, + ib_spec, uflow_res); if (err) goto err_free; + flow_attr->size += ((union ib_flow_spec *) ib_spec)->size; - cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; - kern_spec = (char *)kern_spec + ((struct ib_uverbs_flow_spec *) kern_spec)->size; - ib_spec = (char *)ib_spec + ((union ib_flow_spec *)ib_spec)->size; + cmd.flow_attr.size -= kern_spec->size; + kern_spec = (struct ib_uverbs_flow_spec_hdr *)((u8 *)kern_spec + kern_spec->size); + ib_spec = (u8 *)ib_spec + ((union ib_flow_spec *) ib_spec)->size; } if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", @@ -3799,100 +3308,65 @@ err = -EINVAL; goto err_free; } - flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + + flow_id = qp->device->create_flow( + qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata); + if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); goto err_free; } - flow_id->qp = qp; - flow_id->uobject = uobj; - uobj->object = flow_id; - err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); - if (err) - goto destroy_flow; + ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res); memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; - err = ib_copy_to_udata(ucore, - &resp, sizeof(resp)); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; - put_qp_read(qp); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rule_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); + rdma_alloc_commit_uobject(uobj, attrs); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); -destroy_flow: - ib_destroy_flow(flow_id); + if (!qp->device->destroy_flow(flow_id)) + atomic_dec(&qp->usecnt); err_free: + ib_uverbs_flow_resources_free(uflow_res); +err_free_flow_attr: kfree(flow_attr); err_put: - put_qp_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_uobj: - put_uobj_write(uobj); + uobj_alloc_abort(uobj, attrs); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); return err; } -int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_flow cmd; - struct ib_flow *flow_id; - struct ib_uobject *uobj; int ret; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - flow_id = uobj->object; - - ret = ib_destroy_flow(flow_id); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return ret; + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs); } -static int __uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, +static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3903,64 +3377,82 @@ struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; + struct ib_device *ib_dev; - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs, + &ib_dev); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); - down_write(&obj->uevent.uobject.mutex); + if (cmd->srq_type == IB_SRQT_TM) + attr.ext.tag_matching.max_num_tags = cmd->max_num_tags; if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); - if (!attr.ext.xrc.xrcd) { + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, + attrs); + if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; } + attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object; + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err_put_xrcd; + } + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); + } - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); - if (!attr.ext.xrc.cq) { + if (ib_srq_has_cq(cmd->srq_type)) { + attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, + cmd->cq_handle, attrs); + if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = idr_read_pd(cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; - attr.srq_context = file; + attr.srq_context = attrs->ufile; attr.srq_type = cmd->srq_type; attr.attr.max_wr = cmd->max_wr; attr.attr.max_sge = cmd->max_sge; attr.attr.srq_limit = cmd->srq_limit; - obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, udata); - if (IS_ERR(srq)) { - ret = PTR_ERR(srq); + srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); + if (!srq) { + ret = -ENOMEM; goto err_put; } srq->device = pd->device; srq->pd = pd; srq->srq_type = cmd->srq_type; - srq->uobject = &obj->uevent.uobject; + srq->uobject = obj; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + ret = pd->device->create_srq(srq, &attr, udata); + if (ret) + goto err_free; + + if (ib_srq_has_cq(cmd->srq_type)) { + srq->ext.cq = attr.ext.cq; + atomic_inc(&attr.ext.cq->usecnt); + } + if (cmd->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.cq = attr.ext.xrc.cq; srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; - atomic_inc(&attr.ext.xrc.cq->usecnt); atomic_inc(&attr.ext.xrc.xrcd->usecnt); } @@ -3968,9 +3460,7 @@ atomic_set(&srq->usecnt, 0); obj->uevent.uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd->user_handle; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -3979,69 +3469,57 @@ if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user((void __user *) (unsigned long) cmd->response, - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - - if (cmd->srq_type == IB_SRQT_XRC) { - put_uobj_read(xrcd_uobj); - put_cq_read(attr.ext.xrc.cq); - } - put_pd_read(pd); - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; + if (cmd->srq_type == IB_SRQT_XRC) + uobj_put_read(xrcd_uobj); - up_write(&obj->uevent.uobject.mutex); + if (ib_srq_has_cq(cmd->srq_type)) + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + uobj_put_obj_read(pd); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - -err_destroy: - ib_destroy_srq(srq); - + ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); + /* It was released in ib_destroy_srq_user */ + srq = NULL; +err_free: + kfree(srq); err_put: - put_pd_read(pd); + uobj_put_obj_read(pd); err_put_cq: - if (cmd->srq_type == IB_SRQT_XRC) - put_cq_read(attr.ext.xrc.cq); + if (ib_srq_has_cq(cmd->srq_type)) + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); + uobj_put_read(xrcd_uobj); } err: - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_xsrq xcmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; + memset(&xcmd, 0, sizeof(xcmd)); xcmd.response = cmd.response; xcmd.user_handle = cmd.user_handle; xcmd.srq_type = IB_SRQT_BASIC; @@ -4050,80 +3528,49 @@ xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); - - ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); - if (ret) - return ret; - - return in_len; + return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata); } -ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_xsrq cmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, - u64_to_user_ptr(cmd.response + sizeof resp), - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); - - ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - return in_len; + return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata); } -ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_modify_srq cmd; - struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (!srq) return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; - ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); + ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, + &attrs->driver_udata); - put_srq_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_srq cmd; struct ib_uverbs_query_srq_resp resp; @@ -4131,19 +3578,18 @@ struct ib_srq *srq; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (!srq) return -EINVAL; ret = ib_query_srq(srq, &attr); - put_srq_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) return ret; @@ -4154,85 +3600,49 @@ resp.max_sge = attr.max_sge; resp.srq_limit = attr.srq_limit; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; - struct ib_srq *srq; struct ib_uevent_object *obj; - int ret = -EINVAL; - struct ib_usrq_object *us; - enum ib_srq_type srq_type; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); - if (!uobj) - return -EINVAL; - srq = uobj->object; - obj = container_of(uobj, struct ib_uevent_object, uobject); - srq_type = srq->srq_type; - - ret = ib_destroy_srq(srq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - if (srq_type == IB_SRQT_XRC) { - us = container_of(obj, struct ib_usrq_object, uevent); - atomic_dec(&us->uxrcd->refcnt); - } - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, obj); + uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - memset(&resp, 0, sizeof resp); + obj = container_of(uobj, struct ib_uevent_object, uobject); + memset(&resp, 0, sizeof(resp)); resp.events_reported = obj->events_reported; - put_uobj(uobj); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; + uobj_put_destroy(uobj); - return ret ? ret : in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_query_device_resp resp = { {0} }; + struct ib_uverbs_ex_query_device_resp resp = {}; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; int err; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + err = uverbs_request(attrs, &cmd, sizeof(cmd)); if (err) return err; @@ -4242,21 +3652,12 @@ if (cmd.reserved) return -EINVAL; - resp.response_length = offsetof(typeof(resp), odp_caps); - - if (ucore->outlen < resp.response_length) - return -ENOSPC; - - err = ib_dev->query_device(ib_dev, &attr, uhw); + err = ib_dev->query_device(ib_dev, &attr, &attrs->driver_udata); if (err) return err; - copy_query_dev_fields(file, ib_dev, &resp.base, &attr); - - if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) - goto end; + copy_query_dev_fields(ucontext, &resp.base, &attr); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING resp.odp_caps.general_caps = attr.odp_caps.general_caps; resp.odp_caps.per_transport_caps.rc_odp_caps = attr.odp_caps.per_transport_caps.rc_odp_caps; @@ -4264,44 +3665,431 @@ attr.odp_caps.per_transport_caps.uc_odp_caps; resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; -#endif - resp.response_length += sizeof(resp.odp_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) - goto end; + resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps; resp.timestamp_mask = attr.timestamp_mask; - resp.response_length += sizeof(resp.timestamp_mask); - - if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) - goto end; - resp.hca_core_clock = attr.hca_core_clock; - resp.response_length += sizeof(resp.hca_core_clock); - - if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) - goto end; - resp.device_cap_flags_ex = attr.device_cap_flags; - resp.response_length += sizeof(resp.device_cap_flags_ex); - - if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) - goto end; - resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; resp.rss_caps.max_rwq_indirection_tables = attr.rss_caps.max_rwq_indirection_tables; resp.rss_caps.max_rwq_indirection_table_size = attr.rss_caps.max_rwq_indirection_table_size; + resp.max_wq_type_rq = attr.max_wq_type_rq; + resp.raw_packet_caps = attr.raw_packet_caps; + resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size; + resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags; + resp.tm_caps.max_ops = attr.tm_caps.max_ops; + resp.tm_caps.max_sge = attr.tm_caps.max_sge; + resp.tm_caps.flags = attr.tm_caps.flags; + resp.cq_moderation_caps.max_cq_moderation_count = + attr.cq_caps.max_cq_moderation_count; + resp.cq_moderation_caps.max_cq_moderation_period = + attr.cq_caps.max_cq_moderation_period; + resp.max_dm_size = attr.max_dm_size; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + + return uverbs_response(attrs, &resp, sizeof(resp)); +} - resp.response_length += sizeof(resp.rss_caps); +static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_ex_modify_cq cmd; + struct ib_cq *cq; + int ret; - if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) - goto end; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - resp.max_wq_type_rq = attr.max_wq_type_rq; - resp.response_length += sizeof(resp.max_wq_type_rq); -end: - err = ib_copy_to_udata(ucore, &resp, resp.response_length); - return err; + if (!cmd.attr_mask || cmd.reserved) + return -EINVAL; + + if (cmd.attr_mask > IB_CQ_MODERATE) + return -EOPNOTSUPP; + + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); + if (!cq) + return -EINVAL; + + ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + return ret; } + +/* + * Describe the input structs for write(). Some write methods have an input + * only struct, most have an input and output. If the struct has an output then + * the 'response' u64 must be the first field in the request structure. + * + * If udata is present then both the request and response structs have a + * trailing driver_data flex array. In this case the size of the base struct + * cannot be changed. + */ +#define UAPI_DEF_WRITE_IO(req, resp) \ + .write.has_resp = 1 + \ + BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ + BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \ + sizeof(u64)), \ + .write.req_size = sizeof(req), .write.resp_size = sizeof(resp) + +#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req) + +#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \ + UAPI_DEF_WRITE_IO(req, resp), \ + .write.has_udata = \ + 1 + \ + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \ + sizeof(req)) + \ + BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \ + sizeof(resp)) + +#define UAPI_DEF_WRITE_UDATA_I(req) \ + UAPI_DEF_WRITE_I(req), \ + .write.has_udata = \ + 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \ + sizeof(req)) + +/* + * The _EX versions are for use with WRITE_EX and allow the last struct member + * to be specified. Buffers that do not include that member will be rejected. + */ +#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \ + .write.has_resp = 1, \ + .write.req_size = offsetofend(req, req_last_member), \ + .write.resp_size = offsetofend(resp, resp_last_member) + +#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \ + .write.req_size = offsetofend(req, req_last_member) + +const struct uapi_definition uverbs_def_write_intf[] = { + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_AH, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH, + ib_uverbs_create_ah, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_ah, + struct ib_uverbs_create_ah_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_AH, + ib_uverbs_destroy_ah, + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), + UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, + ib_uverbs_create_comp_channel, + UAPI_DEF_WRITE_IO( + struct ib_uverbs_create_comp_channel, + struct ib_uverbs_create_comp_channel_resp))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_CQ, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ, + ib_uverbs_create_cq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_cq, + struct ib_uverbs_create_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_CQ, + ib_uverbs_destroy_cq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq, + struct ib_uverbs_destroy_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POLL_CQ, + ib_uverbs_poll_cq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq, + struct ib_uverbs_poll_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(poll_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + ib_uverbs_req_notify_cq, + UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq), + UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ, + ib_uverbs_resize_cq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_resize_cq, + struct ib_uverbs_resize_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(resize_cq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_CQ, + ib_uverbs_ex_create_cq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq, + reserved, + struct ib_uverbs_ex_create_cq_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(create_cq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_CQ, + ib_uverbs_ex_modify_cq, + UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), + UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_DEVICE, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT, + ib_uverbs_get_context, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_get_context, + struct ib_uverbs_get_context_resp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_DEVICE, + ib_uverbs_query_device, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device, + struct ib_uverbs_query_device_resp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_PORT, + ib_uverbs_query_port, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port, + struct ib_uverbs_query_port_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_port)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_QUERY_DEVICE, + ib_uverbs_ex_query_device, + UAPI_DEF_WRITE_IO_EX( + struct ib_uverbs_ex_query_device, + reserved, + struct ib_uverbs_ex_query_device_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(query_device)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_FLOW, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_FLOW, + ib_uverbs_ex_create_flow, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow, + flow_attr, + struct ib_uverbs_create_flow_resp, + flow_handle), + UAPI_DEF_METHOD_NEEDS_FN(create_flow)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_FLOW, + ib_uverbs_ex_destroy_flow, + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow), + UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_MR, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR, + ib_uverbs_dereg_mr, + UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr), + UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REG_MR, + ib_uverbs_reg_mr, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr, + struct ib_uverbs_reg_mr_resp), + UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REREG_MR, + ib_uverbs_rereg_mr, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr, + struct ib_uverbs_rereg_mr_resp), + UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_MW, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ALLOC_MW, + ib_uverbs_alloc_mw, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw, + struct ib_uverbs_alloc_mw_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DEALLOC_MW, + ib_uverbs_dealloc_mw, + UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_PD, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ALLOC_PD, + ib_uverbs_alloc_pd, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd, + struct ib_uverbs_alloc_pd_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DEALLOC_PD, + ib_uverbs_dealloc_pd, + UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_QP, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ATTACH_MCAST, + ib_uverbs_attach_mcast, + UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(attach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP, + ib_uverbs_create_qp, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_qp, + struct ib_uverbs_create_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_QP, + ib_uverbs_destroy_qp, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp, + struct ib_uverbs_destroy_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DETACH_MCAST, + ib_uverbs_detach_mcast, + UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_MODIFY_QP, + ib_uverbs_modify_qp, + UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp), + UAPI_DEF_METHOD_NEEDS_FN(modify_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_RECV, + ib_uverbs_post_recv, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv, + struct ib_uverbs_post_recv_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_recv)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_SEND, + ib_uverbs_post_send, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send, + struct ib_uverbs_post_send_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_send)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_QP, + ib_uverbs_query_qp, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp, + struct ib_uverbs_query_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_qp)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_QP, + ib_uverbs_ex_create_qp, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp, + comp_mask, + struct ib_uverbs_ex_create_qp_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(create_qp)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_QP, + ib_uverbs_ex_modify_qp, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp, + base, + struct ib_uverbs_ex_modify_qp_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(modify_qp))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_RWQ_IND_TBL, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + ib_uverbs_ex_create_rwq_ind_table, + UAPI_DEF_WRITE_IO_EX( + struct ib_uverbs_ex_create_rwq_ind_table, + log_ind_tbl_size, + struct ib_uverbs_ex_create_rwq_ind_table_resp, + ind_tbl_num), + UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + ib_uverbs_ex_destroy_rwq_ind_table, + UAPI_DEF_WRITE_I( + struct ib_uverbs_ex_destroy_rwq_ind_table), + UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_WQ, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_WQ, + ib_uverbs_ex_create_wq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq, + max_sge, + struct ib_uverbs_ex_create_wq_resp, + wqn), + UAPI_DEF_METHOD_NEEDS_FN(create_wq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_WQ, + ib_uverbs_ex_destroy_wq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq, + wq_handle, + struct ib_uverbs_ex_destroy_wq_resp, + reserved), + UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_WQ, + ib_uverbs_ex_modify_wq, + UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq, + curr_wq_state), + UAPI_DEF_METHOD_NEEDS_FN(modify_wq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_SRQ, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ, + ib_uverbs_create_srq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_srq, + struct ib_uverbs_create_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_srq)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ, + ib_uverbs_create_xsrq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_xsrq, + struct ib_uverbs_create_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_SRQ, + ib_uverbs_destroy_srq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq, + struct ib_uverbs_destroy_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_MODIFY_SRQ, + ib_uverbs_modify_srq, + UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq), + UAPI_DEF_METHOD_NEEDS_FN(modify_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_SRQ_RECV, + ib_uverbs_post_srq_recv, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv, + struct ib_uverbs_post_srq_recv_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_SRQ, + ib_uverbs_query_srq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq, + struct ib_uverbs_query_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_srq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_XRCD, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_CLOSE_XRCD, + ib_uverbs_close_xrcd, + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, + ib_uverbs_open_qp, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_open_qp, + struct ib_uverbs_create_qp_resp)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD, + ib_uverbs_open_xrcd, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_open_xrcd, + struct ib_uverbs_open_xrcd_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + + {}, +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_ioctl.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_ioctl.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_ioctl.c @@ -0,0 +1,767 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +struct bundle_alloc_head { + struct bundle_alloc_head *next; + uint8_t data[0]; +}; + +struct bundle_priv { + /* Must be first */ + struct bundle_alloc_head alloc_head; + struct bundle_alloc_head *allocated_mem; + size_t internal_avail; + size_t internal_used; + + struct radix_tree_root *radix; + const struct uverbs_api_ioctl_method *method_elm; + u32 method_key; + + struct ib_uverbs_attr __user *user_attrs; + struct ib_uverbs_attr *uattrs; + + DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); + DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); + + /* + * Must be last. bundle ends in a flex array which overlaps + * internal_buffer. + */ + struct uverbs_attr_bundle bundle; + u64 internal_buffer[32]; +}; + +/* + * Each method has an absolute minimum amount of memory it needs to allocate, + * precompute that amount and determine if the onstack memory can be used or + * if allocation is need. + */ +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs) +{ + struct bundle_priv *pbundle; + size_t bundle_size = + offsetof(struct bundle_priv, internal_buffer) + + sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len + + sizeof(*pbundle->uattrs) * num_attrs; + + method_elm->use_stack = bundle_size <= sizeof(*pbundle); + method_elm->bundle_size = + ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer)); + + /* Do not want order-2 allocations for this. */ + WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE); +} + +/** + * uverbs_alloc() - Quickly allocate memory for use with a bundle + * @bundle: The bundle + * @size: Number of bytes to allocate + * @flags: Allocator flags + * + * The bundle allocator is intended for allocations that are connected with + * processing the system call related to the bundle. The allocated memory is + * always freed once the system call completes, and cannot be freed any other + * way. + * + * This tries to use a small pool of pre-allocated memory for performance. + */ +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + size_t new_used; + void *res; + + new_used = size + pbundle->internal_used; + if (new_used < size) + return ERR_PTR(-EOVERFLOW); + + if (new_used > pbundle->internal_avail) { + struct bundle_alloc_head *buf; + + buf = kvmalloc(struct_size(buf, data, size), flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->next = pbundle->allocated_mem; + pbundle->allocated_mem = buf; + return buf->data; + } + + res = (u8 *)pbundle->internal_buffer + pbundle->internal_used; + pbundle->internal_used = + ALIGN(new_used, sizeof(*pbundle->internal_buffer)); + if (flags & __GFP_ZERO) + memset(res, 0, size); + return res; +} +EXPORT_SYMBOL(_uverbs_alloc); + +static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, + u16 len) +{ + if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data)) + return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data + len), + uattr->len - len); + + return !memchr_inv((const u8 *)&uattr->data + len, + 0, uattr->len - len); +} + +static int uverbs_set_output(const struct uverbs_attr_bundle *bundle, + const struct uverbs_attr *attr) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + u16 flags; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + return 0; +} + +static int uverbs_process_idrs_array(struct bundle_priv *pbundle, + const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + struct ib_uverbs_attr *uattr, + u32 attr_bkey) +{ + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + size_t array_len; + u32 *idr_vals; + int ret = 0; + size_t i; + + if (uattr->attr_data.reserved) + return -EINVAL; + + if (uattr->len % sizeof(u32)) + return -EINVAL; + + array_len = uattr->len / sizeof(u32); + if (array_len < spec->u2.objs_arr.min_len || + array_len > spec->u2.objs_arr.max_len) + return -EINVAL; + + attr->uobjects = + uverbs_alloc(&pbundle->bundle, + array_size(array_len, sizeof(*attr->uobjects))); + if (IS_ERR(attr->uobjects)) + return PTR_ERR(attr->uobjects); + + /* + * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects + * to store idrs array and avoid additional memory allocation. The + * idrs array is offset to the end of the uobjects array so we will be + * able to read idr and replace with a pointer. + */ + idr_vals = (u32 *)(attr->uobjects + array_len) - array_len; + + if (uattr->len > sizeof(uattr->data)) { + ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data), + uattr->len); + if (ret) + return -EFAULT; + } else { + memcpy(idr_vals, &uattr->data, uattr->len); + } + + for (i = 0; i != array_len; i++) { + attr->uobjects[i] = uverbs_get_uobject_from_file( + spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access, + idr_vals[i], &pbundle->bundle); + if (IS_ERR(attr->uobjects[i])) { + ret = PTR_ERR(attr->uobjects[i]); + break; + } + } + + attr->len = i; + __set_bit(attr_bkey, pbundle->spec_finalize); + return ret; +} + +static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + bool commit, + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + size_t i; + + for (i = 0; i != attr->len; i++) + uverbs_finalize_object(attr->uobjects[i], + spec->u2.objs_arr.access, commit, attrs); +} + +static int uverbs_process_attr(struct bundle_priv *pbundle, + const struct uverbs_api_attr *attr_uapi, + struct ib_uverbs_attr *uattr, u32 attr_bkey) +{ + const struct uverbs_attr_spec *spec = &attr_uapi->spec; + struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey]; + const struct uverbs_attr_spec *val_spec = spec; + struct uverbs_obj_attr *o_attr; + + switch (spec->type) { + case UVERBS_ATTR_TYPE_ENUM_IN: + if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems) + return -EOPNOTSUPP; + + if (uattr->attr_data.enum_data.reserved) + return -EINVAL; + + val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id]; + + /* Currently we only support PTR_IN based enums */ + if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) + return -EOPNOTSUPP; + + e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id; + /* fall through */ + case UVERBS_ATTR_TYPE_PTR_IN: + /* Ensure that any data provided by userspace beyond the known + * struct is zero. Userspace that knows how to use some future + * longer struct will fail here if used with an old kernel and + * non-zero content, making ABI compat/discovery simpler. + */ + if (uattr->len > val_spec->u.ptr.len && + val_spec->zero_trailing && + !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len)) + return -EOPNOTSUPP; + + /* fall through */ + case UVERBS_ATTR_TYPE_PTR_OUT: + if (uattr->len < val_spec->u.ptr.min_len || + (!val_spec->zero_trailing && + uattr->len > val_spec->u.ptr.len)) + return -EINVAL; + + if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && + uattr->attr_data.reserved) + return -EINVAL; + + e->ptr_attr.uattr_idx = uattr - pbundle->uattrs; + e->ptr_attr.len = uattr->len; + + if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) { + void *p; + + p = uverbs_alloc(&pbundle->bundle, uattr->len); + if (IS_ERR(p)) + return PTR_ERR(p); + + e->ptr_attr.ptr = p; + + if (copy_from_user(p, u64_to_user_ptr(uattr->data), + uattr->len)) + return -EFAULT; + } else { + e->ptr_attr.data = uattr->data; + } + break; + + case UVERBS_ATTR_TYPE_IDR: + case UVERBS_ATTR_TYPE_FD: + if (uattr->attr_data.reserved) + return -EINVAL; + + if (uattr->len != 0) + return -EINVAL; + + o_attr = &e->obj_attr; + o_attr->attr_elm = attr_uapi; + + /* + * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and + * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64 + * here without caring about truncation as we know that the + * IDR implementation today rejects negative IDs + */ + o_attr->uobject = uverbs_get_uobject_from_file( + spec->u.obj.obj_type, spec->u.obj.access, + uattr->data_s64, &pbundle->bundle); + if (IS_ERR(o_attr->uobject)) + return PTR_ERR(o_attr->uobject); + __set_bit(attr_bkey, pbundle->uobj_finalize); + + if (spec->u.obj.access == UVERBS_ACCESS_NEW) { + unsigned int uattr_idx = uattr - pbundle->uattrs; + s64 id = o_attr->uobject->id; + + /* Copy the allocated id to the user-space */ + if (put_user(id, &pbundle->user_attrs[uattr_idx].data)) + return -EFAULT; + } + + break; + + case UVERBS_ATTR_TYPE_IDRS_ARRAY: + return uverbs_process_idrs_array(pbundle, attr_uapi, + &e->objs_arr_attr, uattr, + attr_bkey); + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void *uapi_get_attr_for_method(struct bundle_priv *pbundle, + u32 attr_key) +{ + return radix_tree_lookup(pbundle->radix, + pbundle->method_key | attr_key); +} + +static int uverbs_set_attr(struct bundle_priv *pbundle, + struct ib_uverbs_attr *uattr) +{ + u32 attr_key = uapi_key_attr(uattr->attr_id); + u32 attr_bkey = uapi_bkey_attr(attr_key); + const struct uverbs_api_attr *attr; + void *slot; + int ret; + + slot = uapi_get_attr_for_method(pbundle, attr_key); + if (!slot) { + /* + * Kernel does not support the attribute but user-space says it + * is mandatory + */ + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EPROTONOSUPPORT; + return 0; + } + attr = slot; + + /* Reject duplicate attributes from user-space */ + if (test_bit(attr_bkey, pbundle->bundle.attr_present)) + return -EINVAL; + + ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey); + if (ret) + return ret; + + __set_bit(attr_bkey, pbundle->bundle.attr_present); + + return 0; +} + +static int ib_uverbs_run_method(struct bundle_priv *pbundle, + unsigned int num_attrs) +{ + int (*handler)(struct uverbs_attr_bundle *attrs); + size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); + unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; + unsigned int i; + int ret; + + /* See uverbs_disassociate_api() */ + handler = srcu_dereference( + pbundle->method_elm->handler, + &pbundle->bundle.ufile->device->disassociate_srcu); + if (!handler) + return -EIO; + + pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size); + if (IS_ERR(pbundle->uattrs)) + return PTR_ERR(pbundle->uattrs); + if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size)) + return -EFAULT; + + for (i = 0; i != num_attrs; i++) { + ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]); + if (unlikely(ret)) + return ret; + } + + /* User space did not provide all the mandatory attributes */ + if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory, + pbundle->bundle.attr_present, + pbundle->method_elm->key_bitmap_len))) + return -EINVAL; + + if (pbundle->method_elm->has_udata) + uverbs_fill_udata(&pbundle->bundle, + &pbundle->bundle.driver_udata, + UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); + else + pbundle->bundle.driver_udata = (struct ib_udata){}; + + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { + struct uverbs_obj_attr *destroy_attr = + &pbundle->bundle.attrs[destroy_bkey].obj_attr; + + ret = uobj_destroy(destroy_attr->uobject, &pbundle->bundle); + if (ret) + return ret; + __clear_bit(destroy_bkey, pbundle->uobj_finalize); + + ret = handler(&pbundle->bundle); + uobj_put_destroy(destroy_attr->uobject); + } else { + ret = handler(&pbundle->bundle); + } + + /* + * Until the drivers are revised to use the bundle directly we have to + * assume that the driver wrote to its UHW_OUT and flag userspace + * appropriately. + */ + if (!ret && pbundle->method_elm->has_udata) { + const struct uverbs_attr *attr = + uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(attr)) + ret = uverbs_set_output(&pbundle->bundle, attr); + } + + /* + * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can + * not invoke the method because the request is not supported. No + * other cases should return this code. + */ + if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT)) + return -EINVAL; + + return ret; +} + +static void bundle_destroy(struct bundle_priv *pbundle, bool commit) +{ + unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; + struct bundle_alloc_head *memblock; + unsigned int i; + + /* fast path for simple uobjects */ + i = -1; + while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + + uverbs_finalize_object( + attr->obj_attr.uobject, + attr->obj_attr.attr_elm->spec.u.obj.access, commit, + &pbundle->bundle); + } + + i = -1; + while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len, + i + 1)) < key_bitmap_len) { + struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; + const struct uverbs_api_attr *attr_uapi; + void *slot; + + slot = uapi_get_attr_for_method( + pbundle, + pbundle->method_key | uapi_bkey_to_key_attr(i)); + if (WARN_ON(!slot)) + continue; + + attr_uapi = slot; + + if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { + uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr, + commit, &pbundle->bundle); + } + } + + for (memblock = pbundle->allocated_mem; memblock;) { + struct bundle_alloc_head *tmp = memblock; + + memblock = memblock->next; + kvfree(tmp); + } +} + +static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, + struct ib_uverbs_ioctl_hdr *hdr, + struct ib_uverbs_attr __user *user_attrs) +{ + const struct uverbs_api_ioctl_method *method_elm; + struct uverbs_api *uapi = ufile->device->uapi; + struct bundle_priv *pbundle; + struct bundle_priv onstack; + void *slot; + int ret; + + if (unlikely(hdr->driver_id != uapi->driver_id)) + return -EINVAL; + + slot = radix_tree_lookup( + &uapi->radix, + uapi_key_obj(hdr->object_id) | + uapi_key_ioctl_method(hdr->method_id)); + if (unlikely(!slot)) + return -EPROTONOSUPPORT; + method_elm = slot; + + if (!method_elm->use_stack) { + pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); + if (!pbundle) + return -ENOMEM; + pbundle->internal_avail = + method_elm->bundle_size - + offsetof(struct bundle_priv, internal_buffer); + pbundle->alloc_head.next = NULL; + pbundle->allocated_mem = &pbundle->alloc_head; + } else { + pbundle = &onstack; + pbundle->internal_avail = sizeof(pbundle->internal_buffer); + pbundle->allocated_mem = NULL; + } + + /* Space for the pbundle->bundle.attrs flex array */ + pbundle->method_elm = method_elm; + pbundle->method_key = + uapi_key_obj(hdr->object_id) | + uapi_key_ioctl_method(hdr->method_id); + pbundle->bundle.ufile = ufile; + pbundle->bundle.context = NULL; /* only valid if bundle has uobject */ + pbundle->radix = &uapi->radix; + pbundle->user_attrs = user_attrs; + + pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * + sizeof(*pbundle->bundle.attrs), + sizeof(*pbundle->internal_buffer)); + memset(pbundle->bundle.attr_present, 0, + sizeof(pbundle->bundle.attr_present)); + memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); + memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); + + ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); + bundle_destroy(pbundle, ret == 0); + return ret; +} + +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_ioctl_hdr __user *user_hdr = + (struct ib_uverbs_ioctl_hdr __user *)arg; + struct ib_uverbs_ioctl_hdr hdr; + int srcu_key; + int err; + + if (unlikely(cmd != RDMA_VERBS_IOCTL)) + return -ENOIOCTLCMD; + + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + if (err) + return -EFAULT; + + if (hdr.length > PAGE_SIZE || + hdr.length != struct_size(&hdr, attrs, hdr.num_attrs)) + return -EINVAL; + + if (hdr.reserved1 || hdr.reserved2) + return -EPROTONOSUPPORT; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs); + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + return err; +} + +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + const struct uverbs_attr *attr; + u64 flags; + + attr = uverbs_attr_get(attrs_bundle, idx); + /* Missing attribute means 0 flags */ + if (IS_ERR(attr)) { + *to = 0; + return 0; + } + + /* + * New userspace code should use 8 bytes to pass flags, but we + * transparently support old userspaces that were using 4 bytes as + * well. + */ + if (attr->ptr_attr.len == 8) + flags = attr->ptr_attr.data; + else if (attr->ptr_attr.len == 4) + flags = *(const u32 *)&attr->ptr_attr.data; + else + return -EINVAL; + + if (flags & ~allowed_bits) + return -EINVAL; + + *to = flags; + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags64); + +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + u64 flags; + int ret; + + ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits); + if (ret) + return ret; + + if (flags > U32_MAX) + return -EINVAL; + *to = flags; + + return 0; +} +EXPORT_SYMBOL(uverbs_get_flags32); + +/* + * Fill a ib_udata struct (core or uhw) using the given attribute IDs. + * This is primarily used to convert the UVERBS_ATTR_UHW() into the + * ib_udata format used by the drivers. + */ +void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, + struct ib_udata *udata, unsigned int attr_in, + unsigned int attr_out) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + const struct uverbs_attr *in = + uverbs_attr_get(&pbundle->bundle, attr_in); + const struct uverbs_attr *out = + uverbs_attr_get(&pbundle->bundle, attr_out); + + if (!IS_ERR(in)) { + udata->inlen = in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(in)) + udata->inbuf = (void *) + &pbundle->user_attrs[in->ptr_attr.uattr_idx] + .data; + else + udata->inbuf = u64_to_user_ptr(in->ptr_attr.data); + } else { + udata->inbuf = NULL; + udata->inlen = 0; + } + + if (!IS_ERR(out)) { + udata->outbuf = u64_to_user_ptr(out->ptr_attr.data); + udata->outlen = out->ptr_attr.len; + } else { + udata->outbuf = NULL; + udata->outlen = 0; + } +} + +int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, + const void *from, size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) + return -EFAULT; + + return uverbs_set_output(bundle, attr); +} +EXPORT_SYMBOL(uverbs_copy_to); + + +/* + * This is only used if the caller has directly used copy_to_use to write the + * data. It signals to user space that the buffer is filled in. + */ +int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return uverbs_set_output(bundle, attr); +} + +int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val) +{ + const struct uverbs_attr *attr; + + attr = uverbs_attr_get(attrs_bundle, idx); + if (IS_ERR(attr)) { + if ((PTR_ERR(attr) != -ENOENT) || !def_val) + return PTR_ERR(attr); + + *to = *def_val; + } else { + *to = attr->ptr_attr.data; + } + + if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(_uverbs_get_const); + +int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + if (size < attr->ptr_attr.len) { + if (clear_user(u64_to_user_ptr(attr->ptr_attr.data + size), + attr->ptr_attr.len - size)) + return -EFAULT; + } + return uverbs_copy_to(bundle, idx, from, size); +} diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c --- a/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c @@ -49,12 +49,16 @@ #include #include #include +#include #include #include +#include #include "uverbs.h" +#include "core_priv.h" +#include "rdma_core.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); @@ -63,86 +67,17 @@ enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, - IB_UVERBS_MAX_DEVICES = 32 + IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS, + IB_UVERBS_NUM_FIXED_MINOR = 32, + IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR, }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) +static dev_t dynamic_uverbs_dev; static struct class *uverbs_class; -DEFINE_SPINLOCK(ib_uverbs_idr_lock); -DEFINE_IDR(ib_uverbs_pd_idr); -DEFINE_IDR(ib_uverbs_mr_idr); -DEFINE_IDR(ib_uverbs_mw_idr); -DEFINE_IDR(ib_uverbs_ah_idr); -DEFINE_IDR(ib_uverbs_cq_idr); -DEFINE_IDR(ib_uverbs_qp_idr); -DEFINE_IDR(ib_uverbs_srq_idr); -DEFINE_IDR(ib_uverbs_xrcd_idr); -DEFINE_IDR(ib_uverbs_rule_idr); -DEFINE_IDR(ib_uverbs_wq_idr); -DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); - -static DEFINE_SPINLOCK(map_lock); -static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); - -static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, - [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, - [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, - [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, - [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, - [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, -}; - -static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - struct ib_udata *ucore, - struct ib_udata *uhw) = { - [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, - [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, - [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, - [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, - [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, - [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, - [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, - [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, - [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, - [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, -}; - +static DEFINE_IDA(uverbs_ida); static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); @@ -181,67 +116,56 @@ return ret; } -static void ib_uverbs_release_dev(struct kobject *kobj) +static void ib_uverbs_release_dev(struct device *device) { struct ib_uverbs_device *dev = - container_of(kobj, struct ib_uverbs_device, kobj); + container_of(device, struct ib_uverbs_device, dev); + uverbs_destroy_api(dev->uapi); cleanup_srcu_struct(&dev->disassociate_srcu); + mutex_destroy(&dev->lists_mutex); + mutex_destroy(&dev->xrcd_tree_mutex); kfree(dev); } -static struct kobj_type ib_uverbs_dev_ktype = { - .release = ib_uverbs_release_dev, -}; - -static void ib_uverbs_release_event_file(struct kref *ref) -{ - struct ib_uverbs_event_file *file = - container_of(ref, struct ib_uverbs_event_file, ref); - - kfree(file); -} - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, - struct ib_ucq_object *uobj) +void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, + struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { - spin_lock_irq(&ev_file->lock); + spin_lock_irq(&ev_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&ev_file->lock); + spin_unlock_irq(&ev_file->ev_queue.lock); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); + uverbs_uobject_put(&ev_file->uobj); } - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); + ib_uverbs_release_uevent(&uobj->uevent); } -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj) +void ib_uverbs_release_uevent(struct ib_uevent_object *uobj) { + struct ib_uverbs_async_event_file *async_file = + READ_ONCE(uobj->uobject.ufile->async_file); struct ib_uverbs_event *evt, *tmp; - spin_lock_irq(&file->async_file->lock); + if (!async_file) + return; + + spin_lock_irq(&async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->lock); + spin_unlock_irq(&async_file->ev_queue.lock); } -static void ib_uverbs_detach_umcast(struct ib_qp *qp, - struct ib_uqp_object *uobj) +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; @@ -252,140 +176,20 @@ } } -static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj, *tmp; - - context->closing = 1; - - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = uobj->object; - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - ib_destroy_ah(ah); - kfree(uobj); - } - - /* Remove MWs before QPs, in order to support type 2A MWs. */ - list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { - struct ib_mw *mw = uobj->object; - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - uverbs_dealloc_mw(mw); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { - struct ib_flow *flow_id = uobj->object; - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - ib_destroy_flow(flow_id); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = uobj->object; - struct ib_uqp_object *uqp = - container_of(uobj, struct ib_uqp_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp == qp->real_qp) - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - ib_uverbs_release_uevent(file, &uqp->uevent); - kfree(uqp); - } - - list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { - struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; - struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - ib_destroy_rwq_ind_table(rwq_ind_tbl); - kfree(ind_tbl); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { - struct ib_wq *wq = uobj->object; - struct ib_uwq_object *uwq = - container_of(uobj, struct ib_uwq_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - ib_destroy_wq(wq); - ib_uverbs_release_uevent(file, &uwq->uevent); - kfree(uwq); - } - - list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = uobj->object; - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - ib_destroy_srq(srq); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); - } - - list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = uobj->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - ib_destroy_cq(cq); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); - } - - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = uobj->object; - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - ib_dereg_mr(mr); - kfree(uobj); - } - - mutex_lock(&file->device->xrcd_tree_mutex); - list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { - struct ib_xrcd *xrcd = uobj->object; - struct ib_uxrcd_object *uxrcd = - container_of(uobj, struct ib_uxrcd_object, uobject); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - ib_uverbs_dealloc_xrcd(file->device, xrcd); - kfree(uxrcd); - } - mutex_unlock(&file->device->xrcd_tree_mutex); - - list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = uobj->object; - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - ib_dealloc_pd(pd); - kfree(uobj); - } - - put_pid(context->tgid); - - return context->device->dealloc_ucontext(context); -} - static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); } -static void ib_uverbs_release_file(struct kref *ref) +void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); struct ib_device *ib_dev; int srcu_key; + release_ufile_idr_uobject(file); + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); @@ -396,61 +200,59 @@ if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + if (file->async_file) + uverbs_uobject_put(&file->async_file->uobj); + put_device(&file->device->dev); + + mutex_destroy(&file->umap_lock); + mutex_destroy(&file->ucontext_lock); kfree(file); } -static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) +static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, + struct file *filp, char __user *buf, + size_t count, loff_t *pos, + size_t eventsz) { - struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; - int eventsz; int ret = 0; - spin_lock_irq(&file->lock); + spin_lock_irq(&ev_queue->lock); - while (list_empty(&file->event_list)) { - spin_unlock_irq(&file->lock); + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; - if (wait_event_interruptible(file->poll_wait, - (!list_empty(&file->event_list) || - /* The barriers built into wait_event_interruptible() - * and wake_up() guarentee this will see the null set - * without using RCU - */ - !file->uverbs_file->device->ib_dev))) + if (wait_event_interruptible(ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || + ev_queue->is_closed))) return -ERESTARTSYS; + spin_lock_irq(&ev_queue->lock); + /* If device was disassociated and no event exists set an error */ - if (list_empty(&file->event_list) && - !file->uverbs_file->device->ib_dev) + if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); return -EIO; - - spin_lock_irq(&file->lock); + } } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); - - if (file->is_async) - eventsz = sizeof (struct ib_uverbs_async_event_desc); - else - eventsz = sizeof (struct ib_uverbs_comp_event_desc); + event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { - list_del(file->event_list.next); + list_del(ev_queue->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } - spin_unlock_irq(&file->lock); + spin_unlock_irq(&ev_queue->lock); if (event) { if (copy_to_user(buf, event, eventsz)) @@ -464,483 +266,552 @@ return ret; } -static unsigned int ib_uverbs_event_poll(struct file *filp, +static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, + sizeof(struct ib_uverbs_async_event_desc)); +} + +static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, + pos, + sizeof(struct ib_uverbs_comp_event_desc)); +} + +static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, + struct file *filp, struct poll_table_struct *wait) { - unsigned int pollflags = 0; - struct ib_uverbs_event_file *file = filp->private_data; + __poll_t pollflags = 0; - poll_wait(filp, &file->poll_wait, wait); + poll_wait(filp, &ev_queue->poll_wait, wait); - spin_lock_irq(&file->lock); - if (!list_empty(&file->event_list)) + spin_lock_irq(&ev_queue->lock); + if (!list_empty(&ev_queue->event_list)) pollflags = POLLIN | POLLRDNORM; - spin_unlock_irq(&file->lock); + spin_unlock_irq(&ev_queue->lock); return pollflags; } -static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) +static __poll_t ib_uverbs_async_event_poll(struct file *filp, + struct poll_table_struct *wait) { - struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_async_event_file *file = filp->private_data; - return fasync_helper(fd, filp, on, &file->async_queue); + return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } -static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +static __poll_t ib_uverbs_comp_event_poll(struct file *filp, + struct poll_table_struct *wait) { - struct ib_uverbs_event_file *file = filp->private_data; - struct ib_uverbs_event *entry, *tmp; - int closed_already = 0; - - mutex_lock(&file->uverbs_file->device->lists_mutex); - spin_lock_irq(&file->lock); - closed_already = file->is_closed; - file->is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - spin_unlock_irq(&file->lock); - if (!closed_already) { - list_del(&file->list); - if (file->is_async) - ib_unregister_event_handler(&file->uverbs_file-> - event_handler); - } - mutex_unlock(&file->uverbs_file->device->lists_mutex); + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_event_file); + return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); +} - return 0; +static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) +{ + struct ib_uverbs_async_event_file *file = filp->private_data; + + return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } -static const struct file_operations uverbs_event_fops = { +static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) +{ + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); +} + +const struct file_operations uverbs_event_fops = { + .owner = THIS_MODULE, + .read = ib_uverbs_comp_event_read, + .poll = ib_uverbs_comp_event_poll, + .release = uverbs_uobject_fd_release, + .fasync = ib_uverbs_comp_event_fasync, + .llseek = no_llseek, +}; + +const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, - .read = ib_uverbs_event_read, - .poll = ib_uverbs_event_poll, - .release = ib_uverbs_event_close, - .fasync = ib_uverbs_event_fasync, + .read = ib_uverbs_async_event_read, + .poll = ib_uverbs_async_event_poll, + .release = uverbs_uobject_fd_release, + .fasync = ib_uverbs_async_event_fasync, .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_event_file *file = cq_context; + struct ib_uverbs_event_queue *ev_queue = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; - if (!file) + if (!ev_queue) return; - spin_lock_irqsave(&file->lock, flags); - if (file->is_closed) { - spin_unlock_irqrestore(&file->lock, flags); + spin_lock_irqsave(&ev_queue->lock, flags); + if (ev_queue->is_closed) { + spin_unlock_irqrestore(&ev_queue->lock, flags); return; } - entry = kmalloc(sizeof *entry, GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&ev_queue->lock, flags); return; } - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = cq->uobject; - entry->desc.comp.cq_handle = cq->uobject->user_handle; + entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle; entry->counter = &uobj->comp_events_reported; - list_add_tail(&entry->list, &file->event_list); + list_add_tail(&entry->list, &ev_queue->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&ev_queue->lock, flags); - wake_up_interruptible(&file->poll_wait); - kill_fasync(&file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&ev_queue->poll_wait); + kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } -static void ib_uverbs_async_handler(struct ib_uverbs_file *file, - __u64 element, __u64 event, - struct list_head *obj_list, - u32 *counter) +static void +ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, + __u64 element, __u64 event, struct list_head *obj_list, + u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->lock, flags); - if (file->async_file->is_closed) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + if (!async_file) + return; + + spin_lock_irqsave(&async_file->ev_queue.lock, flags); + if (async_file->ev_queue.is_closed) { + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } - entry = kmalloc(sizeof *entry, GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); return; } - entry->desc.async.element = element; + entry->desc.async.element = element; entry->desc.async.event_type = event; - entry->desc.async.reserved = 0; - entry->counter = counter; + entry->desc.async.reserved = 0; + entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->event_list); + list_add_tail(&entry->list, &async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&async_file->ev_queue.lock, flags); - wake_up_interruptible(&file->async_file->poll_wait); - kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&async_file->ev_queue.poll_wait); + kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN); } -void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +static void uverbs_uobj_event(struct ib_uevent_object *eobj, + struct ib_event *event) { - struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, - struct ib_ucq_object, uobject); + ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file), + eobj->uobject.user_handle, event->event, + &eobj->event_list, &eobj->events_reported); +} - ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, - event->event, &uobj->async_list, - &uobj->async_events_reported); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + uverbs_uobj_event(&event->element.cq->uobject->uevent, event); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj; - /* for XRC target qp's, check that qp is live */ - if (!event->element.qp->uobject || !event->element.qp->uobject->live) + if (!event->element.qp->uobject) return; - uobj = container_of(event->element.qp->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.qp->uobject->uevent, event); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.wq->uobject->uevent, event); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj; - - uobj = container_of(event->element.srq->uobject, - struct ib_uevent_object, uobject); - - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, - event->event, &uobj->event_list, - &uobj->events_reported); + uverbs_uobj_event(&event->element.srq->uobject->uevent, event); } -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { - struct ib_uverbs_file *file = - container_of(handler, struct ib_uverbs_file, event_handler); - - ib_uverbs_async_handler(file, event->element.port_num, event->event, - NULL, NULL); + ib_uverbs_async_handler( + container_of(handler, struct ib_uverbs_async_event_file, + event_handler), + event->element.port_num, event->event, NULL, NULL); } -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) +void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) { - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); - file->async_file = NULL; + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + ev_queue->is_closed = 0; + ev_queue->async_queue = NULL; } -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async) +void ib_uverbs_init_async_event_file( + struct ib_uverbs_async_event_file *async_file) { - struct ib_uverbs_event_file *ev_file; - struct file *filp; - int ret; - - ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); - if (!ev_file) - return ERR_PTR(-ENOMEM); + struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile; + struct ib_device *ib_dev = async_file->uobj.context->device; - kref_init(&ev_file->ref); - spin_lock_init(&ev_file->lock); - INIT_LIST_HEAD(&ev_file->event_list); - init_waitqueue_head(&ev_file->poll_wait); - ev_file->uverbs_file = uverbs_file; - kref_get(&ev_file->uverbs_file->ref); - ev_file->async_queue = NULL; - ev_file->is_closed = 0; + ib_uverbs_init_event_queue(&async_file->ev_queue); - /* - * fops_get() can't fail here, because we're coming from a - * system call on a uverbs file, which will already have a - * module reference. - */ - filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); - if (IS_ERR(filp)) - goto err_put_refs; - filp->private_data = ev_file; - - mutex_lock(&uverbs_file->device->lists_mutex); - list_add_tail(&ev_file->list, - &uverbs_file->device->uverbs_events_file_list); - mutex_unlock(&uverbs_file->device->lists_mutex); - - if (is_async) { - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&uverbs_file->event_handler); - if (ret) - goto err_put_file; - - /* At that point async file stuff was fully set */ - ev_file->is_async = 1; + /* The first async_event_file becomes the default one for the file. */ + mutex_lock(&uverbs_file->ucontext_lock); + if (!uverbs_file->async_file) { + /* Pairs with the put in ib_uverbs_release_file */ + uverbs_uobject_get(&async_file->uobj); + atomic_store_rel_ptr((uintptr_t *)&uverbs_file->async_file, (uintptr_t)async_file); } + mutex_unlock(&uverbs_file->ucontext_lock); - return filp; - -err_put_file: - fput(filp); - kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); - uverbs_file->async_file = NULL; - return ERR_PTR(ret); - -err_put_refs: - kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); - return filp; + INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev, + ib_uverbs_event_handler); + ib_register_event_handler(&async_file->event_handler); } -/* - * Look up a completion event file by FD. If lookup is successful, - * takes a ref to the event file struct that it returns; if - * unsuccessful, returns NULL. - */ -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, + struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, + const struct uverbs_api_write_method *method_elm) { - struct ib_uverbs_event_file *ev_file = NULL; - struct fd f = fdget(fd); + if (method_elm->is_ex) { + count -= sizeof(*hdr) + sizeof(*ex_hdr); - if (!f.file) - return NULL; + if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) + return -EINVAL; - if (f.file->f_op != &uverbs_event_fops) - goto out; + if (hdr->in_words * 8 < method_elm->req_size) + return -ENOSPC; - ev_file = f.file->private_data; - if (ev_file->is_async) { - ev_file = NULL; - goto out; - } + if (ex_hdr->cmd_hdr_reserved) + return -EINVAL; - kref_get(&ev_file->ref); + if (ex_hdr->response) { + if (!hdr->out_words && !ex_hdr->provider_out_words) + return -EINVAL; -out: - fdput(f); - return ev_file; -} + if (hdr->out_words * 8 < method_elm->resp_size) + return -ENOSPC; -static int verify_command_mask(struct ib_device *ib_dev, __u32 command) -{ - u64 mask; - - if (command <= IB_USER_VERBS_CMD_OPEN_QP) - mask = ib_dev->uverbs_cmd_mask; - else - mask = ib_dev->uverbs_ex_cmd_mask; + if (!access_ok(u64_to_user_ptr(ex_hdr->response), + (hdr->out_words + ex_hdr->provider_out_words) * 8)) + return -EFAULT; + } else { + if (hdr->out_words || ex_hdr->provider_out_words) + return -EINVAL; + } - if (mask & ((u64)1 << command)) return 0; + } + + /* not extended command */ + if (hdr->in_words * 4 != count) + return -EINVAL; - return -1; + if (count < method_elm->req_size + sizeof(hdr)) { + /* + * rdma-core v18 and v19 have a bug where they send DESTROY_CQ + * with a 16 byte write instead of 24. Old kernels didn't + * check the size so they allowed this. Now that the size is + * checked provide a compatibility work around to not break + * those userspaces. + */ + if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && + count == 16) { + hdr->in_words = 6; + return 0; + } + return -ENOSPC; + } + if (hdr->out_words * 4 < method_elm->resp_size) + return -ENOSPC; + + return 0; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; + const struct uverbs_api_write_method *method_elm; + struct uverbs_api *uapi = file->device->uapi; + struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_uverbs_cmd_hdr hdr; - __u32 command; - __u32 flags; + struct uverbs_attr_bundle bundle; int srcu_key; ssize_t ret; - if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + if (!ib_safe_file_access(filp)) { + pr_warn_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + current->pid, current->comm); return -EACCES; + } - if (count < sizeof hdr) + if (count < sizeof(hdr)) return -EINVAL; - if (copy_from_user(&hdr, buf, sizeof hdr)) + if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; - goto out; - } + method_elm = uapi_get_method(uapi, hdr.command); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); - if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | - IB_USER_VERBS_CMD_COMMAND_MASK)) { - ret = -EINVAL; - goto out; + if (method_elm->is_ex) { + if (count < (sizeof(hdr) + sizeof(ex_hdr))) + return -EINVAL; + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) + return -EFAULT; } - command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; - if (verify_command_mask(ib_dev, command)) { - ret = -EOPNOTSUPP; - goto out; - } + ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); + if (ret) + return ret; - if (!file->ucontext && - command != IB_USER_VERBS_CMD_GET_CONTEXT) { - ret = -EINVAL; - goto out; - } - - flags = (hdr.command & - IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; - - if (!flags) { - if (command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[command]) { - ret = -EINVAL; - goto out; - } - - if (hdr.in_words * 4 != count) { - ret = -EINVAL; - goto out; - } - - ret = uverbs_cmd_table[command](file, ib_dev, - buf + sizeof(hdr), - hdr.in_words * 4, - hdr.out_words * 4); - - } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { - struct ib_uverbs_ex_cmd_hdr ex_hdr; - struct ib_udata ucore; - struct ib_udata uhw; - size_t written_count = count; - - if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || - !uverbs_ex_cmd_table[command]) { - ret = -ENOSYS; - goto out; - } - - if (!file->ucontext) { - ret = -EINVAL; - goto out; - } - - if (count < (sizeof(hdr) + sizeof(ex_hdr))) { - ret = -EINVAL; - goto out; - } - - if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { - ret = -EFAULT; - goto out; - } - - count -= sizeof(hdr) + sizeof(ex_hdr); - buf += sizeof(hdr) + sizeof(ex_hdr); - - if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { - ret = -EINVAL; - goto out; - } + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - if (ex_hdr.cmd_hdr_reserved) { - ret = -EINVAL; - goto out; + buf += sizeof(hdr); + + memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); + bundle.ufile = file; + bundle.context = NULL; /* only valid if bundle has uobject */ + if (!method_elm->is_ex) { + size_t in_len = hdr.in_words * 4 - sizeof(hdr); + size_t out_len = hdr.out_words * 4; + u64 response = 0; + + if (method_elm->has_udata) { + bundle.driver_udata.inlen = + in_len - method_elm->req_size; + in_len = method_elm->req_size; + if (bundle.driver_udata.inlen) + bundle.driver_udata.inbuf = buf + in_len; + else + bundle.driver_udata.inbuf = NULL; + } else { + memset(&bundle.driver_udata, 0, + sizeof(bundle.driver_udata)); } - if (ex_hdr.response) { - if (!hdr.out_words && !ex_hdr.provider_out_words) { - ret = -EINVAL; - goto out; - } - - if (!access_ok((void __user *) (unsigned long) ex_hdr.response, - (hdr.out_words + ex_hdr.provider_out_words) * 8)) { - ret = -EFAULT; - goto out; + if (method_elm->has_resp) { + /* + * The macros check that if has_resp is set + * then the command request structure starts + * with a '__aligned u64 response' member. + */ + ret = get_user(response, (const u64 __user *)buf); + if (ret) + goto out_unlock; + + if (method_elm->has_udata) { + bundle.driver_udata.outlen = + out_len - method_elm->resp_size; + out_len = method_elm->resp_size; + if (bundle.driver_udata.outlen) + bundle.driver_udata.outbuf = + u64_to_user_ptr(response + + out_len); + else + bundle.driver_udata.outbuf = NULL; } } else { - if (hdr.out_words || ex_hdr.provider_out_words) { - ret = -EINVAL; - goto out; - } + bundle.driver_udata.outlen = 0; + bundle.driver_udata.outbuf = NULL; } - ib_uverbs_init_udata_buf_or_null(&ucore, buf, - u64_to_user_ptr(ex_hdr.response), - hdr.in_words * 8, hdr.out_words * 8); - - ib_uverbs_init_udata_buf_or_null(&uhw, - buf + ucore.inlen, - u64_to_user_ptr(ex_hdr.response + ucore.outlen), - ex_hdr.provider_in_words * 8, - ex_hdr.provider_out_words * 8); - - ret = uverbs_ex_cmd_table[command](file, - ib_dev, - &ucore, - &uhw); - if (!ret) - ret = written_count; + ib_uverbs_init_udata_buf_or_null( + &bundle.ucore, buf, u64_to_user_ptr(response), + in_len, out_len); } else { - ret = -ENOSYS; + buf += sizeof(ex_hdr); + + ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, + u64_to_user_ptr(ex_hdr.response), + hdr.in_words * 8, hdr.out_words * 8); + + ib_uverbs_init_udata_buf_or_null( + &bundle.driver_udata, buf + bundle.ucore.inlen, + u64_to_user_ptr(ex_hdr.response + bundle.ucore.outlen), + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); + } -out: + ret = method_elm->handler(&bundle); +out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return ret; + return (ret) ? : count; } +static const struct vm_operations_struct rdma_umap_ops; + static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; - struct ib_device *ib_dev; + struct ib_ucontext *ucontext; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ib_dev = srcu_dereference(file->device->ib_dev, - &file->device->disassociate_srcu); - if (!ib_dev) { - ret = -EIO; + ucontext = ib_uverbs_get_ucontext_file(file); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(ucontext); goto out; } - - if (!file->ucontext) - ret = -ENODEV; - else - ret = ib_dev->mmap(file->ucontext, vma); + vma->vm_ops = &rdma_umap_ops; + ret = ucontext->device->mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } +/* + * The VMA has been dup'd, initialize the vm_private_data with a new tracking + * struct + */ +static void rdma_umap_open(struct vm_area_struct *vma) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + struct rdma_umap_priv *opriv = vma->vm_private_data; + struct rdma_umap_priv *priv; + + if (!opriv) + return; + + /* We are racing with disassociation */ + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + goto out_zap; + /* + * Disassociation already completed, the VMA should already be zapped. + */ + if (!ufile->ucontext) + goto out_unlock; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + goto out_unlock; + rdma_umap_priv_init(priv, vma, opriv->entry); + + up_read(&ufile->hw_destroy_rwsem); + return; + +out_unlock: + up_read(&ufile->hw_destroy_rwsem); +out_zap: + /* + * We can't allow the VMA to be created with the actual IO pages, that + * would break our API contract, and it can't be stopped at this + * point, so zap it. + */ + vma->vm_private_data = NULL; + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); +} + +static void rdma_umap_close(struct vm_area_struct *vma) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + struct rdma_umap_priv *priv = vma->vm_private_data; + + if (!priv) + return; + + /* + * The vma holds a reference on the struct file that created it, which + * in turn means that the ib_uverbs_file is guaranteed to exist at + * this point. + */ + mutex_lock(&ufile->umap_lock); + + list_del(&priv->list); + mutex_unlock(&ufile->umap_lock); + kfree(priv); +} + +static const struct vm_operations_struct rdma_umap_ops = { + .open = rdma_umap_open, + .close = rdma_umap_close, +}; + +void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) +{ + struct rdma_umap_priv *priv, *next_priv; + + lockdep_assert_held(&ufile->hw_destroy_rwsem); + + while (1) { + struct mm_struct *mm = NULL; + + /* Get an arbitrary mm pointer that hasn't been cleaned yet */ + mutex_lock(&ufile->umap_lock); + while (!list_empty(&ufile->umaps)) { + int ret; + + priv = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list); + mm = priv->vma->vm_mm; + ret = mmget_not_zero(mm); + if (!ret) { + list_del_init(&priv->list); + mm = NULL; + continue; + } + break; + } + mutex_unlock(&ufile->umap_lock); + if (!mm) + return; + + /* + * The umap_lock is nested under mmap_sem since it used within + * the vma_ops callbacks, so we have to clean the list one mm + * at a time to get the lock ordering right. Typically there + * will only be one mm, so no big deal. + */ + down_read(&mm->mmap_sem); + mutex_lock(&ufile->umap_lock); + list_for_each_entry_safe (priv, next_priv, &ufile->umaps, + list) { + struct vm_area_struct *vma = priv->vma; + + if (vma->vm_mm != mm) + continue; + list_del_init(&priv->list); + + zap_vma_ptes(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + } + mutex_unlock(&ufile->umap_lock); + up_read(&mm->mmap_sem); + mmput(mm); + } +} + /* * ib_uverbs_open() does not need the BKL: * @@ -964,6 +835,7 @@ if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; + get_device(&dev->dev); srcu_key = srcu_read_lock(&dev->disassociate_srcu); mutex_lock(&dev->lists_mutex); ib_dev = srcu_dereference(dev->ib_dev, @@ -995,18 +867,22 @@ } file->device = dev; - file->ucontext = NULL; - file->async_file = NULL; kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); + mutex_init(&file->ucontext_lock); + + spin_lock_init(&file->uobjects_lock); + INIT_LIST_HEAD(&file->uobjects); + init_rwsem(&file->hw_destroy_rwsem); + mutex_init(&file->umap_lock); + INIT_LIST_HEAD(&file->umaps); filp->private_data = file; - kobject_get(&dev->kobj); list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); + setup_ufile_idr_uobject(file); + return nonseekable_open(inode, filp); err_module: @@ -1018,33 +894,21 @@ if (atomic_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); + put_device(&dev->dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - struct ib_uverbs_device *dev = file->device; - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); mutex_lock(&file->device->lists_mutex); - if (!file->is_closed) { - list_del(&file->list); - file->is_closed = 1; - } + list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); - if (file->async_file) - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); - kref_put(&file->ref, ib_uverbs_release_file); - kobject_put(&dev->kobj); return 0; } @@ -1055,6 +919,8 @@ .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, + .unlocked_ioctl = ib_uverbs_ioctl, + .compat_ioctl = NULL, }; static const struct file_operations uverbs_mmap_fops = { @@ -1064,6 +930,8 @@ .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, + .unlocked_ioctl = ib_uverbs_ioctl, + .compat_ioctl = NULL, }; static struct ib_client uverbs_client = { @@ -1072,17 +940,15 @@ .remove = ib_uverbs_remove_one }; -static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, +static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, char *buf) { + struct ib_uverbs_device *dev = + container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; - struct ib_uverbs_device *dev = dev_get_drvdata(device); struct ib_device *ib_dev; - if (!dev) - return -ENODEV; - srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) @@ -1091,18 +957,17 @@ return ret; } -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR(ibdev, S_IRUGO, ibdev_show, NULL); -static ssize_t show_dev_abi_version(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t abi_version_show(struct device *device, + struct device_attribute *attr, char *buf) { - struct ib_uverbs_device *dev = dev_get_drvdata(device); + struct ib_uverbs_device *dev = + container_of(device, struct ib_uverbs_device, dev); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; - if (!dev) - return -ENODEV; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) @@ -1111,45 +976,31 @@ return ret; } -static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); +static DEVICE_ATTR(abi_version, S_IRUGO, abi_version_show, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); -static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); - -/* - * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by - * requesting a new major number and doubling the number of max devices we - * support. It's stupid, but simple. - */ -static int find_overflow_devnum(void) +static int ib_uverbs_create_uapi(struct ib_device *device, + struct ib_uverbs_device *uverbs_dev) { - int ret; + struct uverbs_api *uapi; - if (!overflow_maj) { - ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, - "infiniband_verbs"); - if (ret) { - pr_err("user_verbs: couldn't register dynamic device number\n"); - return ret; - } - } + uapi = uverbs_alloc_api(device); + if (IS_ERR(uapi)) + return PTR_ERR(uapi); - ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); - if (ret >= IB_UVERBS_MAX_DEVICES) - return -1; - - return ret; + uverbs_dev->uapi = uapi; + return 0; } static ssize_t show_dev_device(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_uverbs_device *dev = dev_get_drvdata(device); + struct ib_uverbs_device *dev = + container_of(device, struct ib_uverbs_device, dev); - if (!dev || !dev->ib_dev->dma_device) + if (!dev->ib_dev->dma_device) return -ENODEV; return sprintf(buf, "0x%04x\n", @@ -1160,9 +1011,10 @@ static ssize_t show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_uverbs_device *dev = dev_get_drvdata(device); + struct ib_uverbs_device *dev = + container_of(device, struct ib_uverbs_device, dev); - if (!dev || !dev->ib_dev->dma_device) + if (!dev->ib_dev->dma_device) return -ENODEV; return sprintf(buf, "0x%04x\n", @@ -1170,16 +1022,15 @@ } static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); -struct attribute *device_attrs[] = -{ +static struct attribute *ib_dev_attrs[] = { &dev_attr_device.attr, &dev_attr_vendor.attr, - NULL + NULL, }; -static struct attribute_group device_group = { - .name = "device", - .attrs = device_attrs +static const struct attribute_group dev_attr_group = { + .name = "device", + .attrs = ib_dev_attrs, }; static void ib_uverbs_add_one(struct ib_device *device) @@ -1192,7 +1043,7 @@ if (!device->alloc_ucontext) return; - uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); + uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); if (!uverbs_dev) return; @@ -1202,77 +1053,63 @@ return; } + uverbs_dev->dev.class = uverbs_class; + uverbs_dev->dev.parent = device->dev.parent; + uverbs_dev->dev.release = ib_uverbs_release_dev; + device_initialize(&uverbs_dev->dev); atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); - kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); - INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); - - spin_lock(&map_lock); - devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); - if (devnum >= IB_UVERBS_MAX_DEVICES) { - spin_unlock(&map_lock); - devnum = find_overflow_devnum(); - if (devnum < 0) - goto err; - - spin_lock(&map_lock); - uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; - base = devnum + overflow_maj; - set_bit(devnum, overflow_map); - } else { - uverbs_dev->devnum = devnum; - base = devnum + IB_UVERBS_BASE_DEV; - set_bit(devnum, dev_map); - } - spin_unlock(&map_lock); - rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; - cdev_init(&uverbs_dev->cdev, NULL); + devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, + GFP_KERNEL); + if (devnum < 0) + goto err; + uverbs_dev->devnum = devnum; + if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) + base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; + else + base = IB_UVERBS_BASE_DEV + devnum; + + if (ib_uverbs_create_uapi(device, uverbs_dev)) + goto err_uapi; + + uverbs_dev->dev.devt = base; + dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); + + cdev_init(&uverbs_dev->cdev, + device->mmap ? &uverbs_mmap_fops : &uverbs_fops); uverbs_dev->cdev.owner = THIS_MODULE; - uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; - uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; + kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->cdev, base, 1)) - goto err_cdev; + ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); + if (ret) + goto err_uapi; - uverbs_dev->dev = device_create(uverbs_class, device->dma_device, - uverbs_dev->cdev.dev, uverbs_dev, - "uverbs%d", uverbs_dev->devnum); - if (IS_ERR(uverbs_dev->dev)) + if (device_create_file(&uverbs_dev->dev, &dev_attr_ibdev)) + goto err_cdev; + if (device_create_file(&uverbs_dev->dev, &dev_attr_abi_version)) + goto err_cdev; + if (sysfs_create_group(&uverbs_dev->dev.kobj, &dev_attr_group)) goto err_cdev; - - if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) - goto err_class; - if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) - goto err_class; - if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) - goto err_class; ib_set_client_data(device, &uverbs_client, uverbs_dev); - return; -err_class: - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - err_cdev: - cdev_del(&uverbs_dev->cdev); - if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) - clear_bit(devnum, dev_map); - else - clear_bit(devnum, overflow_map); - + cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); +err_uapi: + ida_free(&uverbs_ida, devnum); err: if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kobject_put(&uverbs_dev->kobj); + put_device(&uverbs_dev->dev); return; } @@ -1280,70 +1117,35 @@ struct ib_device *ib_dev) { struct ib_uverbs_file *file; - struct ib_uverbs_event_file *event_file; - struct ib_event event; /* Pending running commands to terminate */ - synchronize_srcu(&uverbs_dev->disassociate_srcu); - event.event = IB_EVENT_DEVICE_FATAL; - event.element.port_num = 0; - event.device = ib_dev; + uverbs_disassociate_api_pre(uverbs_dev); mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); - file->is_closed = 1; - list_del(&file->list); + list_del_init(&file->list); kref_get(&file->ref); - mutex_unlock(&uverbs_dev->lists_mutex); - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ucontext + /* We must release the mutex before going ahead and calling + * uverbs_cleanup_ufile, as it might end up indirectly calling + * uverbs_close, for example due to freeing the resources (e.g + * mmput). */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_uverbs_event_handler(&file->event_handler, &event); - ib_dev->disassociate_ucontext(ucontext); - ib_uverbs_cleanup_ucontext(file, ucontext); - } + mutex_unlock(&uverbs_dev->lists_mutex); - mutex_lock(&uverbs_dev->lists_mutex); - kref_put(&file->ref, ib_uverbs_release_file); - } + ib_uverbs_async_handler(READ_ONCE(file->async_file), 0, + IB_EVENT_DEVICE_FATAL, NULL, NULL); - while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { - event_file = list_first_entry(&uverbs_dev-> - uverbs_events_file_list, - struct ib_uverbs_event_file, - list); - spin_lock_irq(&event_file->lock); - event_file->is_closed = 1; - spin_unlock_irq(&event_file->lock); - - list_del(&event_file->list); - if (event_file->is_async) { - ib_unregister_event_handler(&event_file->uverbs_file-> - event_handler); - event_file->uverbs_file->event_handler.device = NULL; - } + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); + kref_put(&file->ref, ib_uverbs_release_file); - wake_up_interruptible(&event_file->poll_wait); - kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + mutex_lock(&uverbs_dev->lists_mutex); } mutex_unlock(&uverbs_dev->lists_mutex); + + uverbs_disassociate_api(uverbs_dev->uapi); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) @@ -1354,15 +1156,8 @@ if (!uverbs_dev) return; - sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); - dev_set_drvdata(uverbs_dev->dev, NULL); - device_destroy(uverbs_class, uverbs_dev->cdev.dev); - cdev_del(&uverbs_dev->cdev); - - if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) - clear_bit(uverbs_dev->devnum, dev_map); - else - clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); + cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); + ida_free(&uverbs_ida, uverbs_dev->devnum); if (device->disassociate_ucontext) { /* We disassociate HW resources and immediately return. @@ -1376,7 +1171,6 @@ * cdev was deleted, however active clients can still issue * commands and close their open files. */ - rcu_assign_pointer(uverbs_dev->ib_dev, NULL); ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } @@ -1385,7 +1179,8 @@ ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); - kobject_put(&uverbs_dev->kobj); + + put_device(&uverbs_dev->dev); } static char *uverbs_devnode(struct device *dev, umode_t *mode) @@ -1399,13 +1194,22 @@ { int ret; - ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, + ret = register_chrdev_region(IB_UVERBS_BASE_DEV, + IB_UVERBS_NUM_FIXED_MINOR, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register device number\n"); goto out; } + ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0, + IB_UVERBS_NUM_DYNAMIC_MINOR, + "infiniband_verbs"); + if (ret) { + pr_err("couldn't register dynamic device number\n"); + goto out_alloc; + } + uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); @@ -1433,7 +1237,12 @@ class_destroy(uverbs_class); out_chrdev: - unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + unregister_chrdev_region(dynamic_uverbs_dev, + IB_UVERBS_NUM_DYNAMIC_MINOR); + +out_alloc: + unregister_chrdev_region(IB_UVERBS_BASE_DEV, + IB_UVERBS_NUM_FIXED_MINOR); out: return ret; @@ -1443,16 +1252,10 @@ { ib_unregister_client(&uverbs_client); class_destroy(uverbs_class); - unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); - if (overflow_maj) - unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); - idr_destroy(&ib_uverbs_pd_idr); - idr_destroy(&ib_uverbs_mr_idr); - idr_destroy(&ib_uverbs_mw_idr); - idr_destroy(&ib_uverbs_ah_idr); - idr_destroy(&ib_uverbs_cq_idr); - idr_destroy(&ib_uverbs_qp_idr); - idr_destroy(&ib_uverbs_srq_idr); + unregister_chrdev_region(IB_UVERBS_BASE_DEV, + IB_UVERBS_NUM_FIXED_MINOR); + unregister_chrdev_region(dynamic_uverbs_dev, + IB_UVERBS_NUM_DYNAMIC_MINOR); } module_init_order(ib_uverbs_init, SI_ORDER_FIFTH); diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types.c @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_free_ah(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + return ib_destroy_ah_user((struct ib_ah *)uobject->object, + RDMA_DESTROY_AH_SLEEPABLE, + &attrs->driver_udata); +} + +static int uverbs_free_flow(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_flow *flow = (struct ib_flow *)uobject->object; + struct ib_uflow_object *uflow = + container_of(uobject, struct ib_uflow_object, uobject); + struct ib_qp *qp = flow->qp; + int ret; + + ret = flow->device->destroy_flow(flow); + if (!ret) { + if (qp) + atomic_dec(&qp->usecnt); + ib_uverbs_flow_resources_free(uflow->resources); + } + + return ret; +} + +static int uverbs_free_mw(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + return uverbs_dealloc_mw((struct ib_mw *)uobject->object); +} + +static int uverbs_free_qp(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_qp *qp = uobject->object; + struct ib_uqp_object *uqp = + container_of(uobject, struct ib_uqp_object, uevent.uobject); + int ret; + + /* + * If this is a user triggered destroy then do not allow destruction + * until the user cleans up all the mcast bindings. Unlike in other + * places we forcibly clean up the mcast attachments for !DESTROY + * because the mcast attaches are not ubojects and will not be + * destroyed by anything else during cleanup processing. + */ + if (why == RDMA_REMOVE_DESTROY) { + if (!list_empty(&uqp->mcast_list)) + return -EBUSY; + } else if (qp == qp->real_qp) { + ib_uverbs_detach_umcast(qp, uqp); + } + + ret = ib_destroy_qp_user(qp, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + if (uqp->uxrcd) + atomic_dec(&uqp->uxrcd->refcnt); + + ib_uverbs_release_uevent(&uqp->uevent); + return ret; +} + +static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + int ret; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(ind_tbl); + return ret; +} + +static int uverbs_free_wq(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_wq *wq = uobject->object; + struct ib_uwq_object *uwq = + container_of(uobject, struct ib_uwq_object, uevent.uobject); + int ret; + + ret = ib_destroy_wq(wq, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_uevent(&uwq->uevent); + return ret; +} + +static int uverbs_free_srq(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_srq *srq = uobject->object; + struct ib_uevent_object *uevent = + container_of(uobject, struct ib_uevent_object, uobject); + enum ib_srq_type srq_type = srq->srq_type; + int ret; + + ret = ib_destroy_srq_user(srq, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + if (srq_type == IB_SRQT_XRC) { + struct ib_usrq_object *us = + container_of(uevent, struct ib_usrq_object, uevent); + + atomic_dec(&us->uxrcd->refcnt); + } + + ib_uverbs_release_uevent(uevent); + return ret; +} + +static int uverbs_free_xrcd(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_xrcd *xrcd = uobject->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobject, struct ib_uxrcd_object, uobject); + int ret; + + ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); + if (ret) + return ret; + + mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); + mutex_unlock(&attrs->ufile->device->xrcd_tree_mutex); + + return ret; +} + +static int uverbs_free_pd(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); + if (ret) + return ret; + + ib_dealloc_pd_user(pd, &attrs->driver_udata); + return 0; +} + +void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) +{ + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&event_queue->lock); + /* + * The user must ensure that no new items are added to the event_list + * once is_closed is set. + */ + event_queue->is_closed = 1; + spin_unlock_irq(&event_queue->lock); + wake_up_interruptible(&event_queue->poll_wait); + kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); + + spin_lock_irq(&event_queue->lock); + list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + list_del(&entry->list); + kfree(entry); + } + spin_unlock_irq(&event_queue->lock); +} + +static int +uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct ib_uverbs_completion_event_file *file = + container_of(uobj, struct ib_uverbs_completion_event_file, + uobj); + + ib_uverbs_free_event_queue(&file->ev_queue); + return 0; +} + +int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +EXPORT_SYMBOL(uverbs_destroy_def_handler); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), + uverbs_completion_event_file_destroy_uobj, + &uverbs_event_fops, + "[infinibandevent]", + FMODE_READ)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_QP, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_MW_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE, + UVERBS_OBJECT_MW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw), + &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_SRQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), + uverbs_free_srq)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_AH_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE, + UVERBS_OBJECT_AH, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah), + &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), + uverbs_free_flow), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_WQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_RWQ_IND_TBL_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl), + &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_XRCD_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE, + UVERBS_OBJECT_XRCD, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_XRCD, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), + uverbs_free_xrcd), + &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_PD_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd), + &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY)); + +const struct uapi_definition uverbs_def_obj_intf[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP, + UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH, + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW, + UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + UVERBS_OBJECT_RWQ_IND_TBL, + UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_async_fd.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +#include + +static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC); + + ib_uverbs_init_async_event_file( + container_of(uobj, struct ib_uverbs_async_event_file, uobj)); + return 0; +} + +static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct ib_uverbs_async_event_file *event_file = + container_of(uobj, struct ib_uverbs_async_event_file, uobj); + + ib_unregister_event_handler(&event_file->event_handler); + ib_uverbs_free_event_queue(&event_file->ev_queue); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_ASYNC_EVENT_ALLOC, + UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE, + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_ASYNC_EVENT, + UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file), + uverbs_async_event_destroy_uobj, + &uverbs_async_event_fops, + "[infinibandevent]", + FMODE_READ), + &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC)); + +const struct uapi_definition uverbs_def_obj_async_fd[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_counters.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rdma_core.h" +#include "uverbs.h" +#include + +#include + +static int uverbs_free_counters(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_counters *counters = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); + if (ret) + return ret; + + return counters->device->destroy_counters(counters); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); + struct ib_device *ib_dev = attrs->context->device; + struct ib_counters *counters; + int ret; + + /* + * This check should be removed once the infrastructure + * have the ability to remove methods from parse tree once + * such condition is met. + */ + if (!ib_dev->create_counters) + return -EOPNOTSUPP; + + counters = ib_dev->create_counters(ib_dev, attrs); + if (IS_ERR(counters)) { + ret = PTR_ERR(counters); + goto err_create_counters; + } + + counters->device = ib_dev; + counters->uobject = uobj; + uobj->object = counters; + atomic_set(&counters->usecnt, 0); + + return 0; + +err_create_counters: + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_counters_read_attr read_attr = {}; + const struct uverbs_attr *uattr; + struct ib_counters *counters = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); + int ret; + + if (!counters->device->read_counters) + return -EOPNOTSUPP; + + if (!atomic_read(&counters->usecnt)) + return -EINVAL; + + ret = uverbs_get_flags32(&read_attr.flags, attrs, + UVERBS_ATTR_READ_COUNTERS_FLAGS, + IB_UVERBS_READ_COUNTERS_PREFER_CACHED); + if (ret) + return ret; + + uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); + read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); + read_attr.counters_buff = uverbs_zalloc( + attrs, array_size(read_attr.ncounters, sizeof(u64))); + if (IS_ERR(read_attr.counters_buff)) + return PTR_ERR(read_attr.counters_buff); + + ret = counters->device->read_counters(counters, &read_attr, attrs); + if (ret) + return ret; + + return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, + read_attr.counters_buff, + read_attr.ncounters * sizeof(u64)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_COUNTERS_READ, + UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_MIN_SIZE(0), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + enum ib_uverbs_read_counters_flags)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); + +const struct uapi_definition uverbs_def_obj_counters[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS, + UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_cq.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_free_cq(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_cq *cq = uobject->object; + struct ib_uverbs_event_queue *ev_queue = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobject, struct ib_ucq_object, uevent.uobject); + int ret; + + ret = ib_destroy_cq_user(cq, &attrs->driver_udata); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + ib_uverbs_release_ucq( + ev_queue ? container_of(ev_queue, + struct ib_uverbs_completion_event_file, + ev_queue) : + NULL, + ucq); + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_ucq_object *obj = container_of( + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), + typeof(*obj), uevent.uobject); + struct ib_device *ib_dev = attrs->context->device; + int ret; + u64 user_handle; + struct ib_cq_init_attr attr = {}; + struct ib_cq *cq; + struct ib_uverbs_completion_event_file *ev_file = NULL; + struct ib_uobject *ev_file_uobj; + + if (!ib_dev->create_cq || !ib_dev->destroy_cq) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.comp_vector, attrs, + UVERBS_ATTR_CREATE_CQ_COMP_VECTOR); + if (!ret) + ret = uverbs_copy_from(&attr.cqe, attrs, + UVERBS_ATTR_CREATE_CQ_CQE); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, + UVERBS_ATTR_CREATE_CQ_USER_HANDLE); + if (ret) + return ret; + + ret = uverbs_get_flags32(&attr.flags, attrs, + UVERBS_ATTR_CREATE_CQ_FLAGS, + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION | + IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN); + if (ret) + return ret; + + ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); + if (!IS_ERR(ev_file_uobj)) { + ev_file = container_of(ev_file_uobj, + struct ib_uverbs_completion_event_file, + uobj); + uverbs_uobject_get(ev_file_uobj); + } + + if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { + ret = -EINVAL; + goto err_event_file; + } + + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->uevent.event_list); + + cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); + if (!cq) { + ret = -ENOMEM; + goto err_event_file; + } + + cq->device = ib_dev; + cq->uobject = obj; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; + atomic_set(&cq->usecnt, 0); + + ret = ib_dev->create_cq(cq, &attr, &attrs->driver_udata); + if (ret) + goto err_free; + + obj->uevent.uobject.object = cq; + obj->uevent.uobject.user_handle = user_handle; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, + sizeof(cq->cqe)); + if (ret) + goto err_cq; + + return 0; +err_cq: + ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); + cq = NULL; +err_free: + kfree(cq); +err_event_file: + if (ev_file) + uverbs_uobject_put(ev_file_uobj); + return ret; +}; + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, + enum ib_uverbs_ex_create_cq_flags), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_UHW()); + +static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); + struct ib_ucq_object *obj = + container_of(uobj, struct ib_ucq_object, uevent.uobject); + struct ib_uverbs_destroy_cq_resp resp = { + .comp_events_reported = obj->comp_events_reported, + .async_events_reported = obj->uevent.events_reported + }; + + return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, + sizeof(resp)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_CQ_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, + UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_CQ, + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), + +#if 1 /* CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI */ + &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) +#endif +); + +const struct uapi_definition uverbs_def_obj_cq[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_device.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include "rdma_core.h" +#include "uverbs.h" +#include + +/* + * This ioctl method allows calling any defined write or write_ex + * handler. This essentially replaces the hdr/ex_hdr system with the ioctl + * marshalling, and brings the non-ex path into the same marshalling as the ex + * path. + */ +static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)( + struct uverbs_attr_bundle *attrs) +{ + struct uverbs_api *uapi = attrs->ufile->device->uapi; + const struct uverbs_api_write_method *method_elm; + u32 cmd; + int rc; + + rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD); + if (rc) + return rc; + + method_elm = uapi_get_method(uapi, cmd); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + + uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_CORE_OUT); + + if (attrs->ucore.inlen < method_elm->req_size || + attrs->ucore.outlen < method_elm->resp_size) + return -ENOSPC; + + return method_elm->handler(attrs); +} + +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD, + enum ib_uverbs_write_cmds, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT, + UVERBS_ATTR_MIN_SIZE(0), + UA_OPTIONAL), + UVERBS_ATTR_UHW()); + +static uint32_t * +gather_objects_handle(struct ib_uverbs_file *ufile, + const struct uverbs_api_object *uapi_object, + struct uverbs_attr_bundle *attrs, + ssize_t out_len, + u64 *total) +{ + u64 max_count = out_len / sizeof(u32); + struct ib_uobject *obj; + u64 count = 0; + u32 *handles; + + /* Allocated memory that cannot page out where we gather + * all object ids under a spin_lock. + */ + handles = uverbs_zalloc(attrs, out_len); + if (IS_ERR(handles)) + return handles; + + spin_lock_irq(&ufile->uobjects_lock); + list_for_each_entry(obj, &ufile->uobjects, list) { + u32 obj_id = obj->id; + + if (obj->uapi_object != uapi_object) + continue; + + if (count >= max_count) + break; + + handles[count] = obj_id; + count++; + } + spin_unlock_irq(&ufile->uobjects_lock); + + *total = count; + return handles; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)( + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_api_object *uapi_object; + ssize_t out_len; + u64 total = 0; + u16 object_id; + u32 *handles; + int ret; + + out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST); + if (out_len <= 0 || (out_len % sizeof(u32) != 0)) + return -EINVAL; + + ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID); + if (ret) + return ret; + + uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id); + if (!uapi_object) + return -EINVAL; + + handles = gather_objects_handle(attrs->ufile, uapi_object, attrs, + out_len, &total); + if (IS_ERR(handles)) + return PTR_ERR(handles); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles, + sizeof(u32) * total); + if (ret) + goto err; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total, + sizeof(total)); +err: + return ret; +} + +void copy_port_attr_to_resp(struct ib_port_attr *attr, + struct ib_uverbs_query_port_resp *resp, + struct ib_device *ib_dev, u8 port_num) +{ + resp->state = attr->state; + resp->max_mtu = attr->max_mtu; + resp->active_mtu = attr->active_mtu; + resp->gid_tbl_len = attr->gid_tbl_len; + resp->port_cap_flags = make_port_cap_flags(attr); + resp->max_msg_sz = attr->max_msg_sz; + resp->bad_pkey_cntr = attr->bad_pkey_cntr; + resp->qkey_viol_cntr = attr->qkey_viol_cntr; + resp->pkey_tbl_len = attr->pkey_tbl_len; + + if (attr->grh_required) + resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED; + + resp->lid = (u16)attr->lid; + resp->sm_lid = (u16)attr->sm_lid; + resp->lmc = attr->lmc; + resp->max_vl_num = attr->max_vl_num; + resp->sm_sl = attr->sm_sl; + resp->subnet_timeout = attr->subnet_timeout; + resp->init_type_reply = attr->init_type_reply; + resp->active_width = attr->active_width; + resp->active_speed = attr->active_speed; + resp->phys_state = attr->phys_state; + resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_device *ib_dev; + struct ib_port_attr attr = {}; + struct ib_uverbs_query_port_resp_ex resp = {}; + struct ib_ucontext *ucontext; + int ret; + u8 port_num; + + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ + if (!ib_dev->query_port) + return -EOPNOTSUPP; + + ret = uverbs_get_const(&port_num, attrs, + UVERBS_ATTR_QUERY_PORT_PORT_NUM); + if (ret) + return ret; + + ret = ib_query_port(ib_dev, port_num, &attr); + if (ret) + return ret; + + copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num); + resp.port_cap_flags2 = 0; + + return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP, + &resp, sizeof(resp)); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)( + struct uverbs_attr_bundle *attrs) +{ + u32 num_comp = attrs->ufile->device->num_comp_vectors; + u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS; + int ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + &num_comp, sizeof(num_comp)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, + &core_support, sizeof(core_support)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = ib_alloc_ucontext(attrs); + if (ret) + return ret; + ret = ib_init_ucontext(attrs); + if (ret) { + kfree(attrs->context); + attrs->context = NULL; + return ret; + } + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_GET_CONTEXT, + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, + UVERBS_ATTR_TYPE(u64), UA_OPTIONAL), + UVERBS_ATTR_UHW()); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_INFO_HANDLES, + /* Also includes any device specific object ids */ + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID, + enum uverbs_default_objects, UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_PORT, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY), + UVERBS_ATTR_PTR_OUT( + UVERBS_ATTR_QUERY_PORT_RESP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex, + reserved), + UA_MANDATORY)); + +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, + &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), + &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), + &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); + +const struct uapi_definition uverbs_def_obj_device[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE), + {}, +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_dm.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rdma_core.h" +#include "uverbs.h" +#include + +static int uverbs_free_dm(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_dm *dm = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); + if (ret) + return ret; + + return dm->device->dealloc_dm(dm, attrs); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_dm_alloc_attr attr = {}; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE) + ->obj_attr.uobject; + struct ib_device *ib_dev = attrs->context->device; + struct ib_dm *dm; + int ret; + + if (!ib_dev->alloc_dm) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.length, attrs, + UVERBS_ATTR_ALLOC_DM_LENGTH); + if (ret) + return ret; + + ret = uverbs_copy_from(&attr.alignment, attrs, + UVERBS_ATTR_ALLOC_DM_ALIGNMENT); + if (ret) + return ret; + + dm = ib_dev->alloc_dm(ib_dev, attrs->context, &attr, attrs); + if (IS_ERR(dm)) + return PTR_ERR(dm); + + dm->device = ib_dev; + dm->length = attr.length; + dm->uobject = uobj; + atomic_set(&dm->usecnt, 0); + + uobj->object = dm; + + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_DM_FREE, + UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), + &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), + &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); + +const struct uapi_definition uverbs_def_obj_dm[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_flow_action.c @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rdma_core.h" +#include "uverbs.h" +#include + +static int uverbs_free_flow_action(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct ib_flow_action *action = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&action->usecnt, why, uobject); + if (ret) + return ret; + + return action->device->destroy_flow_action(action); +} + +static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, + u32 flags, bool is_modify) +{ + u64 verbs_flags = flags; + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN)) + verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED; + + if (is_modify && uverbs_attr_is_valid(attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) + verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS; + + return verbs_flags; +}; + +static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat) +{ + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm = + &keymat->keymat.aes_gcm; + + if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return -EOPNOTSUPP; + + if (aes_gcm->key_len != 32 && + aes_gcm->key_len != 24 && + aes_gcm->key_len != 16) + return -EINVAL; + + if (aes_gcm->icv_len != 16 && + aes_gcm->icv_len != 8 && + aes_gcm->icv_len != 12) + return -EINVAL; + + return 0; +} + +static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = { + [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm, +}; + +static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay, + bool is_modify) +{ + /* This is used in order to modify an esp flow action with an enabled + * replay protection to a disabled one. This is only supported via + * modify, as in create verb we can simply drop the REPLAY attribute and + * achieve the same thing. + */ + return is_modify ? 0 : -EINVAL; +} + +static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay, + bool is_modify) +{ + /* Some replay protections could always be enabled without validating + * anything. + */ + return 0; +} + +static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay, + bool is_modify) = { + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none, + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok, +}; + +static int parse_esp_ip(enum ib_flow_spec_type proto, + const void __user *val_ptr, + size_t len, union ib_flow_spec *out) +{ + int ret; + const struct ib_uverbs_flow_ipv4_filter ipv4 = { + .src_ip = cpu_to_be32(0xffffffffUL), + .dst_ip = cpu_to_be32(0xffffffffUL), + .proto = 0xff, + .tos = 0xff, + .ttl = 0xff, + .flags = 0xff, + }; + const struct ib_uverbs_flow_ipv6_filter ipv6 = { + .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + .flow_label = cpu_to_be32(0xffffffffUL), + .next_hdr = 0xff, + .traffic_class = 0xff, + .hop_limit = 0xff, + }; + union { + struct ib_uverbs_flow_ipv4_filter ipv4; + struct ib_uverbs_flow_ipv6_filter ipv6; + } user_val = {}; + const void *user_pmask; + size_t val_len; + + /* If the flow IPv4/IPv6 flow specifications are extended, the mask + * should be changed as well. + */ + BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) + + sizeof(ipv4.flags) != sizeof(ipv4)); + BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) + + sizeof(ipv6.reserved) != sizeof(ipv6)); + + switch (proto) { + case IB_FLOW_SPEC_IPV4: + if (len > sizeof(user_val.ipv4) && + !ib_is_buffer_cleared((const u8 *)val_ptr + sizeof(user_val.ipv4), + len - sizeof(user_val.ipv4))) + return -EOPNOTSUPP; + + val_len = min_t(size_t, len, sizeof(user_val.ipv4)); + ret = copy_from_user(&user_val.ipv4, val_ptr, + val_len); + if (ret) + return -EFAULT; + + user_pmask = &ipv4; + break; + case IB_FLOW_SPEC_IPV6: + if (len > sizeof(user_val.ipv6) && + !ib_is_buffer_cleared((const u8 *)val_ptr + sizeof(user_val.ipv6), + len - sizeof(user_val.ipv6))) + return -EOPNOTSUPP; + + val_len = min_t(size_t, len, sizeof(user_val.ipv6)); + ret = copy_from_user(&user_val.ipv6, val_ptr, + val_len); + if (ret) + return -EFAULT; + + user_pmask = &ipv6; + break; + default: + return -EOPNOTSUPP; + } + + return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask, + &user_val, + val_len, out); +} + +static int flow_action_esp_get_encap(struct ib_flow_spec_list *out, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_flow_action_esp_encap uverbs_encap; + int ret; + + ret = uverbs_copy_from(&uverbs_encap, attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP); + if (ret) + return ret; + + /* We currently support only one encap */ + if (uverbs_encap.next_ptr) + return -EOPNOTSUPP; + + if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 && + uverbs_encap.type != IB_FLOW_SPEC_IPV6) + return -EOPNOTSUPP; + + return parse_esp_ip(uverbs_encap.type, + u64_to_user_ptr(uverbs_encap.val_ptr), + uverbs_encap.len, + &out->spec); +} + +struct ib_flow_action_esp_attr { + struct ib_flow_action_attrs_esp hdr; + struct ib_flow_action_attrs_esp_keymats keymat; + struct ib_flow_action_attrs_esp_replays replay; + /* We currently support only one spec */ + struct ib_flow_spec_list encap; +}; + +#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW +static int parse_flow_action_esp(struct ib_device *ib_dev, + struct uverbs_attr_bundle *attrs, + struct ib_flow_action_esp_attr *esp_attr, + bool is_modify) +{ + struct ib_uverbs_flow_action_esp uverbs_esp = {}; + int ret; + + /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ + ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ESN); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + /* This can be called from FLOW_ACTION_ESP_MODIFY where + * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional + */ + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) { + ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS); + if (ret) + return ret; + + if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1)) + return -EOPNOTSUPP; + + esp_attr->hdr.spi = uverbs_esp.spi; + esp_attr->hdr.seq = uverbs_esp.seq; + esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad; + esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts; + } + esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags, + is_modify); + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) { + esp_attr->keymat.protocol = + uverbs_attr_get_enum_id(attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); + ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat, + attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); + if (ret) + return ret; + + ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat); + if (ret) + return ret; + + esp_attr->hdr.keymat = &esp_attr->keymat; + } + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) { + esp_attr->replay.protocol = + uverbs_attr_get_enum_id(attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); + + ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay, + attrs, + UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); + if (ret) + return ret; + + ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay, + is_modify); + if (ret) + return ret; + + esp_attr->hdr.replay = &esp_attr->replay; + } + + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) { + ret = flow_action_esp_get_encap(&esp_attr->encap, attrs); + if (ret) + return ret; + + esp_attr->hdr.encap = &esp_attr->encap; + } + + return 0; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); + struct ib_device *ib_dev = attrs->context->device; + int ret; + struct ib_flow_action *action; + struct ib_flow_action_esp_attr esp_attr = {}; + + if (!ib_dev->create_flow_action_esp) + return -EOPNOTSUPP; + + ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false); + if (ret) + return ret; + + /* No need to check as this attribute is marked as MANDATORY */ + action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, + attrs); + if (IS_ERR(action)) + return PTR_ERR(action); + + uverbs_flow_action_fill_action(action, uobj, ib_dev, + IB_FLOW_ACTION_ESP); + + return 0; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); + struct ib_flow_action *action = uobj->object; + int ret; + struct ib_flow_action_esp_attr esp_attr = {}; + + if (!action->device->modify_flow_action_esp) + return -EOPNOTSUPP; + + ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true); + if (ret) + return ret; + + if (action->type != IB_FLOW_ACTION_ESP) + return -EINVAL; + + return action->device->modify_flow_action_esp(action, + &esp_attr.hdr, + attrs); +} + +static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { + [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT( + struct ib_uverbs_flow_action_esp_keymat_aes_gcm, + aes_key), + }, +}; + +static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, + size), + }, +}; + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, + UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, + hard_limit_pkts), + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_TYPE(__u32), + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + uverbs_flow_action_esp_keymat, + UA_OPTIONAL), + UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + uverbs_flow_action_esp_replay, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN( + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, + UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + +const struct uapi_definition uverbs_def_obj_flow_action[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + UVERBS_OBJECT_FLOW_ACTION, + UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_std_types_mr.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rdma_core.h" +#include "uverbs.h" +#include + +static int uverbs_free_mr(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + return ib_dereg_mr_user((struct ib_mr *)uobject->object, + &attrs->driver_udata); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE); + enum ib_uverbs_advise_mr_advice advice; + struct ib_device *ib_dev = pd->device; + const struct ib_sge *sg_list; + int num_sge; + u32 flags; + int ret; + + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ + if (!ib_dev->advise_mr) + return -EOPNOTSUPP; + + ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE); + if (ret) + return ret; + + ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS, + IB_UVERBS_ADVISE_MR_FLAG_FLUSH); + if (ret) + return ret; + + num_sge = uverbs_attr_ptr_get_array_size( + attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge)); + if (num_sge < 0) + return num_sge; + + sg_list = uverbs_attr_get_alloced_ptr(attrs, + UVERBS_ATTR_ADVISE_MR_SGE_LIST); + return ib_dev->advise_mr(pd, advice, flags, sg_list, num_sge, + attrs); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_dm_mr_attr attr = {}; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); + struct ib_dm *dm = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + + struct ib_mr *mr; + int ret; + + if (!ib_dev->reg_dm_mr) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET); + if (ret) + return ret; + + ret = uverbs_copy_from(&attr.length, attrs, + UVERBS_ATTR_REG_DM_MR_LENGTH); + if (ret) + return ret; + + ret = uverbs_get_flags32(&attr.access_flags, attrs, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + IB_ACCESS_SUPPORTED); + if (ret) + return ret; + + if (!(attr.access_flags & IB_ZERO_BASED)) + return -EINVAL; + + ret = ib_check_mr_access(attr.access_flags); + if (ret) + return ret; + + if (attr.offset > dm->length || attr.length > dm->length || + attr.length > dm->length - attr.offset) + return -EINVAL; + + mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->device = pd->device; + mr->pd = pd; + mr->type = IB_MR_TYPE_DM; + mr->dm = dm; + mr->uobject = uobj; + atomic_inc(&pd->usecnt); + atomic_inc(&dm->usecnt); + + uobj->object = mr; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey, + sizeof(mr->lkey)); + if (ret) + goto err_dereg; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + if (ret) + goto err_dereg; + + return 0; + +err_dereg: + ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); + + return ret; +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_ADVISE_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE, + enum ib_uverbs_advise_mr_advice, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS, + enum ib_uverbs_advise_mr_flag, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)), + UA_MANDATORY, + UA_ALLOC_AND_COPY)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_DM_MR_REG, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + enum ib_access_flags), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_OBJECT_DM, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_MR_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + UVERBS_OBJECT_MR, + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), + &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); + +const struct uapi_definition uverbs_def_obj_mr[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, + UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)), + {} +}; diff --git a/sys/ofed/drivers/infiniband/core/ib_uverbs_uapi.c b/sys/ofed/drivers/infiniband/core/ib_uverbs_uapi.c new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/ib_uverbs_uapi.c @@ -0,0 +1,731 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + */ +#include +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs) +{ + return -EOPNOTSUPP; +} + +static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) +{ + void *elm; + int rc; + + if (key == UVERBS_API_KEY_ERR) + return ERR_PTR(-EOVERFLOW); + + elm = kzalloc(alloc_size, GFP_KERNEL); + if (!elm) + return ERR_PTR(-ENOMEM); + rc = radix_tree_insert(&uapi->radix, key, elm); + if (rc) { + kfree(elm); + return ERR_PTR(rc); + } + + return elm; +} + +static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key, + size_t alloc_size, bool *exists) +{ + void *elm; + + elm = uapi_add_elm(uapi, key, alloc_size); + if (!IS_ERR(elm)) { + *exists = false; + return elm; + } + + if (elm != ERR_PTR(-EEXIST)) + return elm; + + elm = radix_tree_lookup(&uapi->radix, key); + if (WARN_ON(!elm)) + return ERR_PTR(-EINVAL); + *exists = true; + return elm; +} + +static int uapi_create_write(struct uverbs_api *uapi, + struct ib_device *ibdev, + const struct uapi_definition *def, + u32 obj_key, + u32 *cur_method_key) +{ + struct uverbs_api_write_method *method_elm; + u32 method_key = obj_key; + bool exists; + + if (def->write.is_ex) + method_key |= uapi_key_write_ex_method(def->write.command_num); + else + method_key |= uapi_key_write_method(def->write.command_num); + + method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm), + &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + + if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex))) + return -EINVAL; + + method_elm->is_ex = def->write.is_ex; + method_elm->handler = def->func_write; + if (def->write.is_ex) + method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & + BIT_ULL(def->write.command_num)); + else + method_elm->disabled = !(ibdev->uverbs_cmd_mask & + BIT_ULL(def->write.command_num)); + + if (!def->write.is_ex && def->func_write) { + method_elm->has_udata = def->write.has_udata; + method_elm->has_resp = def->write.has_resp; + method_elm->req_size = def->write.req_size; + method_elm->resp_size = def->write.resp_size; + } + + *cur_method_key = method_key; + return 0; +} + +static int uapi_merge_method(struct uverbs_api *uapi, + struct uverbs_api_object *obj_elm, u32 obj_key, + const struct uverbs_method_def *method, + bool is_driver) +{ + u32 method_key = obj_key | uapi_key_ioctl_method(method->id); + struct uverbs_api_ioctl_method *method_elm; + unsigned int i; + bool exists; + + if (!method->attrs) + return 0; + + method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm), + &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + if (exists) { + /* + * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE + */ + if (WARN_ON(method->handler)) + return -EINVAL; + } else { + WARN_ON(!method->handler); + rcu_assign_pointer(method_elm->handler, method->handler); + if (method->handler != uverbs_destroy_def_handler) + method_elm->driver_method = is_driver; + } + + for (i = 0; i != method->num_attrs; i++) { + const struct uverbs_attr_def *attr = (*method->attrs)[i]; + struct uverbs_api_attr *attr_slot; + + if (!attr) + continue; + + /* + * ENUM_IN contains the 'ids' pointer to the driver's .rodata, + * so if it is specified by a driver then it always makes this + * into a driver method. + */ + if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) + method_elm->driver_method |= is_driver; + + /* + * Like other uobject based things we only support a single + * uobject being NEW'd or DESTROY'd + */ + if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { + u8 access = attr->attr.u2.objs_arr.access; + + if (WARN_ON(access == UVERBS_ACCESS_NEW || + access == UVERBS_ACCESS_DESTROY)) + return -EINVAL; + } + + attr_slot = + uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), + sizeof(*attr_slot)); + /* Attributes are not allowed to be modified by drivers */ + if (IS_ERR(attr_slot)) + return PTR_ERR(attr_slot); + + attr_slot->spec = attr->attr; + } + + return 0; +} + +static int uapi_merge_obj_tree(struct uverbs_api *uapi, + const struct uverbs_object_def *obj, + bool is_driver) +{ + struct uverbs_api_object *obj_elm; + unsigned int i; + u32 obj_key; + bool exists; + int rc; + + obj_key = uapi_key_obj(obj->id); + obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + + if (obj->type_attrs) { + if (WARN_ON(obj_elm->type_attrs)) + return -EINVAL; + + obj_elm->id = obj->id; + obj_elm->type_attrs = obj->type_attrs; + obj_elm->type_class = obj->type_attrs->type_class; + /* + * Today drivers are only permitted to use idr_class and + * fd_class types. We can revoke the IDR types during + * disassociation, and the FD types require the driver to use + * struct file_operations.owner to prevent the driver module + * code from unloading while the file is open. This provides + * enough safety that uverbs_uobject_fd_release() will + * continue to work. Drivers using FD are responsible to + * handle disassociation of the device on their own. + */ + if (WARN_ON(is_driver && + obj->type_attrs->type_class != &uverbs_idr_class && + obj->type_attrs->type_class != &uverbs_fd_class)) + return -EINVAL; + } + + if (!obj->methods) + return 0; + + for (i = 0; i != obj->num_methods; i++) { + const struct uverbs_method_def *method = (*obj->methods)[i]; + + if (!method) + continue; + + rc = uapi_merge_method(uapi, obj_elm, obj_key, method, + is_driver); + if (rc) + return rc; + } + + return 0; +} + +static int uapi_disable_elm(struct uverbs_api *uapi, + const struct uapi_definition *def, + u32 obj_key, + u32 method_key) +{ + bool exists; + + if (def->scope == UAPI_SCOPE_OBJECT) { + struct uverbs_api_object *obj_elm; + + obj_elm = uapi_add_get_elm( + uapi, obj_key, sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + obj_elm->disabled = 1; + return 0; + } + + if (def->scope == UAPI_SCOPE_METHOD && + uapi_key_is_ioctl_method(method_key)) { + struct uverbs_api_ioctl_method *method_elm; + + method_elm = uapi_add_get_elm(uapi, method_key, + sizeof(*method_elm), &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + method_elm->disabled = 1; + return 0; + } + + if (def->scope == UAPI_SCOPE_METHOD && + (uapi_key_is_write_method(method_key) || + uapi_key_is_write_ex_method(method_key))) { + struct uverbs_api_write_method *write_elm; + + write_elm = uapi_add_get_elm(uapi, method_key, + sizeof(*write_elm), &exists); + if (IS_ERR(write_elm)) + return PTR_ERR(write_elm); + write_elm->disabled = 1; + return 0; + } + + WARN_ON(true); + return -EINVAL; +} + +static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev, + const struct uapi_definition *def_list, + bool is_driver) +{ + const struct uapi_definition *def = def_list; + u32 cur_obj_key = UVERBS_API_KEY_ERR; + u32 cur_method_key = UVERBS_API_KEY_ERR; + bool exists; + int rc; + + if (!def_list) + return 0; + + for (;; def++) { + switch ((enum uapi_definition_kind)def->kind) { + case UAPI_DEF_CHAIN: + rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver); + if (rc) + return rc; + continue; + + case UAPI_DEF_CHAIN_OBJ_TREE: + if (WARN_ON(def->object_start.object_id != + def->chain_obj_tree->id)) + return -EINVAL; + + cur_obj_key = uapi_key_obj(def->object_start.object_id); + rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree, + is_driver); + if (rc) + return rc; + continue; + + case UAPI_DEF_END: + return 0; + + case UAPI_DEF_IS_SUPPORTED_DEV_FN: { + void **ibdev_fn = + (void *)((u8 *)ibdev + def->needs_fn_offset); + + if (*ibdev_fn) + continue; + rc = uapi_disable_elm( + uapi, def, cur_obj_key, cur_method_key); + if (rc) + return rc; + continue; + } + + case UAPI_DEF_IS_SUPPORTED_FUNC: + if (def->func_is_supported(ibdev)) + continue; + rc = uapi_disable_elm( + uapi, def, cur_obj_key, cur_method_key); + if (rc) + return rc; + continue; + + case UAPI_DEF_OBJECT_START: { + struct uverbs_api_object *obj_elm; + + cur_obj_key = uapi_key_obj(def->object_start.object_id); + obj_elm = uapi_add_get_elm(uapi, cur_obj_key, + sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + continue; + } + + case UAPI_DEF_WRITE: + rc = uapi_create_write( + uapi, ibdev, def, cur_obj_key, &cur_method_key); + if (rc) + return rc; + continue; + } + WARN_ON(true); + return -EINVAL; + } +} + +static int +uapi_finalize_ioctl_method(struct uverbs_api *uapi, + struct uverbs_api_ioctl_method *method_elm, + u32 method_key) +{ + struct radix_tree_iter iter; + unsigned int num_attrs = 0; + unsigned int max_bkey = 0; + bool single_uobj = false; + void __rcu **slot; + + method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN; + radix_tree_for_each_slot (slot, &uapi->radix, &iter, + uapi_key_attrs_start(method_key)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK; + u32 attr_bkey = uapi_bkey_attr(attr_key); + u8 type = elm->spec.type; + + if (uapi_key_attr_to_ioctl_method(iter.index) != + uapi_key_attr_to_ioctl_method(method_key)) + break; + + if (elm->spec.mandatory) + __set_bit(attr_bkey, method_elm->attr_mandatory); + + if (elm->spec.is_udata) + method_elm->has_udata = true; + + if (type == UVERBS_ATTR_TYPE_IDR || + type == UVERBS_ATTR_TYPE_FD) { + u8 access = elm->spec.u.obj.access; + + /* + * Verbs specs may only have one NEW/DESTROY, we don't + * have the infrastructure to abort multiple NEW's or + * cope with multiple DESTROY failure. + */ + if (access == UVERBS_ACCESS_NEW || + access == UVERBS_ACCESS_DESTROY) { + if (WARN_ON(single_uobj)) + return -EINVAL; + + single_uobj = true; + if (WARN_ON(!elm->spec.mandatory)) + return -EINVAL; + } + + if (access == UVERBS_ACCESS_DESTROY) + method_elm->destroy_bkey = attr_bkey; + } + + max_bkey = max(max_bkey, attr_bkey); + num_attrs++; + } + + method_elm->key_bitmap_len = max_bkey + 1; + WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN); + + uapi_compute_bundle_size(method_elm, num_attrs); + return 0; +} + +static int uapi_finalize(struct uverbs_api *uapi) +{ + const struct uverbs_api_write_method **data; + unsigned long max_write_ex = 0; + unsigned long max_write = 0; + struct radix_tree_iter iter; + void __rcu **slot; + int rc; + int i; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (uapi_key_is_ioctl_method(iter.index)) { + rc = uapi_finalize_ioctl_method(uapi, method_elm, + iter.index); + if (rc) + return rc; + } + + if (uapi_key_is_write_method(iter.index)) + max_write = max(max_write, + iter.index & UVERBS_API_ATTR_KEY_MASK); + if (uapi_key_is_write_ex_method(iter.index)) + max_write_ex = + max(max_write_ex, + iter.index & UVERBS_API_ATTR_KEY_MASK); + } + + uapi->notsupp_method.handler = ib_uverbs_notsupp; + uapi->num_write = max_write + 1; + uapi->num_write_ex = max_write_ex + 1; + data = kmalloc_array(uapi->num_write + uapi->num_write_ex, + sizeof(*uapi->write_methods), GFP_KERNEL); + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) + data[i] = &uapi->notsupp_method; + uapi->write_methods = data; + uapi->write_ex_methods = data + uapi->num_write; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_write_method(iter.index)) + uapi->write_methods[iter.index & + UVERBS_API_ATTR_KEY_MASK] = + rcu_dereference_protected(*slot, true); + if (uapi_key_is_write_ex_method(iter.index)) + uapi->write_ex_methods[iter.index & + UVERBS_API_ATTR_KEY_MASK] = + rcu_dereference_protected(*slot, true); + } + + return 0; +} + +static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) { + if (iter.index > last) + return; + kfree(rcu_dereference_protected(*slot, true)); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } +} + +static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key) +{ + uapi_remove_range(uapi, obj_key, + obj_key | UVERBS_API_METHOD_KEY_MASK | + UVERBS_API_ATTR_KEY_MASK); +} + +static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key) +{ + uapi_remove_range(uapi, method_key, + method_key | UVERBS_API_ATTR_KEY_MASK); +} + + +static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec) +{ + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) + return spec->u.obj.obj_type; + if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY) + return spec->u2.objs_arr.obj_type; + return UVERBS_API_KEY_ERR; +} + +static void uapi_key_okay(u32 key) +{ + unsigned int count = 0; + + if (uapi_key_is_object(key)) + count++; + if (uapi_key_is_ioctl_method(key)) + count++; + if (uapi_key_is_write_method(key)) + count++; + if (uapi_key_is_write_ex_method(key)) + count++; + if (uapi_key_is_attr(key)) + count++; + WARN(count != 1, "Bad count %d key=%x", count, key); +} + +static void uapi_finalize_disable(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + u32 starting_key = 0; + bool scan_again = false; + void __rcu **slot; + +again: + radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) { + uapi_key_okay(iter.index); + + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *obj_elm = + rcu_dereference_protected(*slot, true); + + if (obj_elm->disabled) { + /* Have to check all the attrs again */ + scan_again = true; + starting_key = iter.index; + uapi_remove_object(uapi, iter.index); + goto again; + } + continue; + } + + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->disabled) { + starting_key = iter.index; + uapi_remove_method(uapi, iter.index); + goto again; + } + continue; + } + + if (uapi_key_is_write_method(iter.index) || + uapi_key_is_write_ex_method(iter.index)) { + struct uverbs_api_write_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->disabled) { + kfree(method_elm); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } + continue; + } + + if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *attr_elm = + rcu_dereference_protected(*slot, true); + const struct uverbs_api_object *tmp_obj; + u32 obj_key; + + /* + * If the method has a mandatory object handle + * attribute which relies on an object which is not + * present then the entire method is uncallable. + */ + if (!attr_elm->spec.mandatory) + continue; + obj_key = uapi_get_obj_id(&attr_elm->spec); + if (obj_key == UVERBS_API_KEY_ERR) + continue; + tmp_obj = uapi_get_object(uapi, obj_key); + if (IS_ERR(tmp_obj)) { + if (PTR_ERR(tmp_obj) == -ENOMSG) + continue; + } else { + if (!tmp_obj->disabled) + continue; + } + + starting_key = iter.index; + uapi_remove_method( + uapi, + iter.index & (UVERBS_API_OBJ_KEY_MASK | + UVERBS_API_METHOD_KEY_MASK)); + goto again; + } + + WARN_ON(false); + } + + if (!scan_again) + return; + scan_again = false; + starting_key = 0; + goto again; +} + +void uverbs_destroy_api(struct uverbs_api *uapi) +{ + if (!uapi) + return; + + uapi_remove_range(uapi, 0, U32_MAX); + kfree(uapi->write_methods); + kfree(uapi); +} + +static const struct uapi_definition uverbs_core_api[] = { + UAPI_DEF_CHAIN(uverbs_def_obj_async_fd), + UAPI_DEF_CHAIN(uverbs_def_obj_counters), + UAPI_DEF_CHAIN(uverbs_def_obj_cq), + UAPI_DEF_CHAIN(uverbs_def_obj_device), + UAPI_DEF_CHAIN(uverbs_def_obj_dm), + UAPI_DEF_CHAIN(uverbs_def_obj_flow_action), + UAPI_DEF_CHAIN(uverbs_def_obj_intf), + UAPI_DEF_CHAIN(uverbs_def_obj_mr), + UAPI_DEF_CHAIN(uverbs_def_write_intf), + {}, +}; + +struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev) +{ + struct uverbs_api *uapi; + int rc; + + uapi = kzalloc(sizeof(*uapi), GFP_KERNEL); + if (!uapi) + return ERR_PTR(-ENOMEM); + + INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); + uapi->driver_id = ibdev->ops.driver_id; + + rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false); + if (rc) + goto err; + rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true); + if (rc) + goto err; + + uapi_finalize_disable(uapi); + rc = uapi_finalize(uapi); + if (rc) + goto err; + + return uapi; +err: + if (rc != -ENOMEM) + dev_err(&ibdev->dev, + "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", + rc); + + uverbs_destroy_api(uapi); + return ERR_PTR(rc); +} + +/* + * The pre version is done before destroying the HW objects, it only blocks + * off method access. All methods that require the ib_dev or the module data + * must test one of these assignments prior to continuing. + */ +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev) +{ + struct uverbs_api *uapi = uverbs_dev->uapi; + struct radix_tree_iter iter; + void __rcu **slot; + + rcu_assign_pointer(uverbs_dev->ib_dev, NULL); + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->driver_method) + rcu_assign_pointer(method_elm->handler, NULL); + } + } + + synchronize_srcu(&uverbs_dev->disassociate_srcu); +} + +/* + * Called when a driver disassociates from the ib_uverbs_device. The + * assumption is that the driver module will unload after. Replace everything + * related to the driver with NULL as a safety measure. + */ +void uverbs_disassociate_api(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *object_elm = + rcu_dereference_protected(*slot, true); + + /* + * Some type_attrs are in the driver module. We don't + * bother to keep track of which since there should be + * no use of this after disassociate. + */ + object_elm->type_attrs = NULL; + } else if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *elm = + rcu_dereference_protected(*slot, true); + + if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN) + elm->spec.u2.enum_def.ids = NULL; + } + } +} diff --git a/sys/ofed/drivers/infiniband/core/ib_verbs.c b/sys/ofed/drivers/infiniband/core/ib_verbs.c --- a/sys/ofed/drivers/infiniband/core/ib_verbs.c +++ b/sys/ofed/drivers/infiniband/core/ib_verbs.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -267,10 +268,11 @@ { struct ib_pd *pd; int mr_access_flags = 0; + int ret; - pd = device->alloc_pd(device, NULL, NULL); - if (IS_ERR(pd)) - return pd; + pd = rdma_zalloc_drv_obj(device, ib_pd); + if (!pd) + return ERR_PTR(-ENOMEM); pd->device = device; pd->uobject = NULL; @@ -278,6 +280,12 @@ atomic_set(&pd->usecnt, 0); pd->flags = flags; + ret = device->alloc_pd(pd, NULL); + if (ret) { + kfree(pd); + return ERR_PTR(ret); + } + if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else @@ -299,6 +307,7 @@ mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DMA; mr->uobject = NULL; mr->need_inval = false; @@ -316,19 +325,20 @@ EXPORT_SYMBOL(__ib_alloc_pd); /** - * ib_dealloc_pd - Deallocates a protection domain. + * ib_dealloc_pd_user - Deallocates a protection domain. * @pd: The protection domain to deallocate. + * @udata: Valid user data or NULL for kernel object * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ -void ib_dealloc_pd(struct ib_pd *pd) +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) { int ret; if (pd->__internal_mr) { - ret = pd->device->dereg_mr(pd->__internal_mr); + ret = pd->device->dereg_mr(pd->__internal_mr, NULL); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -337,32 +347,98 @@ requires the caller to guarantee we can't race here. */ WARN_ON(atomic_read(&pd->usecnt)); - /* Making delalloc_pd a void return is a WIP, no driver should return - an error here. */ - ret = pd->device->dealloc_pd(pd); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); + pd->device->dealloc_pd(pd, udata); + kfree(pd); } -EXPORT_SYMBOL(ib_dealloc_pd); +EXPORT_SYMBOL(ib_dealloc_pd_user); /* Address handles */ -struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static struct ib_ah *_ib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr, + u32 flags, + struct ib_udata *udata) { + struct ib_device *device = pd->device; struct ib_ah *ah; + int ret; - ah = pd->device->create_ah(pd, ah_attr, NULL); + might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; - ah->uobject = NULL; - atomic_inc(&pd->usecnt); + if (!device->create_ah) + return ERR_PTR(-EOPNOTSUPP); + + ah = rdma_zalloc_drv_obj_gfp( + device, ib_ah, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + ah->device = device; + ah->pd = pd; + + ret = device->create_ah(ah, ah_attr, flags, udata); + if (ret) { + kfree(ah); + return ERR_PTR(ret); } + atomic_inc(&pd->usecnt); + return ah; +} + +/** + * rdma_create_ah - Creates an address handle for the + * given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @flags: Create address handle flags (see enum rdma_create_ah_flags). + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + u32 flags) +{ + struct ib_ah *ah; + + ah = _ib_create_ah(pd, ah_attr, flags, NULL); + return ah; } EXPORT_SYMBOL(ib_create_ah); +/** + * ib_create_user_ah - Creates an address handle for the + * given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns a valid address handle pointer on success and + * returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_user_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) +{ + int err; + + if (rdma_protocol_roce(pd->device, ah_attr->port_num)) { + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + } + + return _ib_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); +} +EXPORT_SYMBOL(ib_create_user_ah); + static int ib_get_header_version(const union rdma_network_hdr *hdr) { const struct ip *ip4h = (const struct ip *)&hdr->roce4grh; @@ -589,7 +665,7 @@ if (ret) return ERR_PTR(ret); - return ib_create_ah(pd, &ah_attr); + return ib_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); } EXPORT_SYMBOL(ib_create_ah_from_wc); @@ -609,19 +685,20 @@ } EXPORT_SYMBOL(ib_query_ah); -int ib_destroy_ah(struct ib_ah *ah) +int ib_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { struct ib_pd *pd; - int ret; + + might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; - ret = ah->device->destroy_ah(ah); - if (!ret) - atomic_dec(&pd->usecnt); + ah->device->destroy_ah(ah, flags); + atomic_dec(&pd->usecnt); - return ret; + kfree(ah); + return 0; } -EXPORT_SYMBOL(ib_destroy_ah); +EXPORT_SYMBOL(ib_destroy_ah_user); /* Shared receive queues */ @@ -629,27 +706,40 @@ struct ib_srq_init_attr *srq_init_attr) { struct ib_srq *srq; + int ret; if (!pd->device->create_srq) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); - srq = pd->device->create_srq(pd, srq_init_attr, NULL); - - if (!IS_ERR(srq)) { - srq->device = pd->device; - srq->pd = pd; - srq->uobject = NULL; - srq->event_handler = srq_init_attr->event_handler; - srq->srq_context = srq_init_attr->srq_context; - srq->srq_type = srq_init_attr->srq_type; - if (srq->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; - srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; - atomic_inc(&srq->ext.xrc.xrcd->usecnt); - atomic_inc(&srq->ext.xrc.cq->usecnt); - } - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); + srq = rdma_zalloc_drv_obj(pd->device, ib_srq); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->device = pd->device; + srq->pd = pd; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + + if (ib_srq_has_cq(srq->srq_type)) { + srq->ext.cq = srq_init_attr->ext.cq; + atomic_inc(&srq->ext.cq->usecnt); + } + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); + + ret = pd->device->create_srq(srq, srq_init_attr, NULL); + if (ret) { + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); + return ERR_PTR(ret); } return srq; @@ -674,36 +764,23 @@ } EXPORT_SYMBOL(ib_query_srq); -int ib_destroy_srq(struct ib_srq *srq) +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { - struct ib_pd *pd; - enum ib_srq_type srq_type; - struct ib_xrcd *uninitialized_var(xrcd); - struct ib_cq *uninitialized_var(cq); - int ret; - if (atomic_read(&srq->usecnt)) return -EBUSY; - pd = srq->pd; - srq_type = srq->srq_type; - if (srq_type == IB_SRQT_XRC) { - xrcd = srq->ext.xrc.xrcd; - cq = srq->ext.xrc.cq; - } + srq->device->destroy_srq(srq, udata); - ret = srq->device->destroy_srq(srq); - if (!ret) { - atomic_dec(&pd->usecnt); - if (srq_type == IB_SRQT_XRC) { - atomic_dec(&xrcd->usecnt); - atomic_dec(&cq->usecnt); - } - } + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); - return ret; + return 0; } -EXPORT_SYMBOL(ib_destroy_srq); +EXPORT_SYMBOL(ib_destroy_srq_user); /* Queue pairs */ @@ -793,7 +870,7 @@ if (!IS_ERR(qp)) __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); else - real_qp->device->destroy_qp(real_qp); + real_qp->device->destroy_qp(real_qp, NULL); return qp; } @@ -809,7 +886,7 @@ qp_init_attr->cap.max_recv_sge)) return ERR_PTR(-EINVAL); - qp = device->create_qp(pd, qp_init_attr, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); if (IS_ERR(qp)) return qp; @@ -1243,6 +1320,95 @@ } EXPORT_SYMBOL(ib_resolve_eth_dmac); +static bool is_qp_type_connected(const struct ib_qp *qp) +{ + return (qp->qp_type == IB_QPT_UC || + qp->qp_type == IB_QPT_RC || + qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT); +} + +/** + * IB core internal function to perform QP attributes modification. + */ +static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + int ret; + + if (port < rdma_start_port(qp->device) || + port > rdma_end_port(qp->device)) + return -EINVAL; + + if (attr_mask & IB_QP_ALT_PATH) { + /* + * Today the core code can only handle alternate paths and APM + * for IB. Ban them in roce mode. + */ + if (!(rdma_protocol_ib(qp->device, + attr->alt_ah_attr.port_num) && + rdma_protocol_ib(qp->device, port))) { + ret = EINVAL; + goto out; + } + } + + /* + * If the user provided the qp_attr then we have to resolve it. Kernel + * users have to provide already resolved rdma_ah_attr's + */ + if (udata && (attr_mask & IB_QP_AV) && + rdma_protocol_roce(qp->device, port) && + is_qp_type_connected(qp)) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto out; + } + + if (rdma_ib_or_roce(qp->device, port)) { + if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { + dev_warn(&qp->device->dev, + "%s rq_psn overflow, masking to 24 bits\n", + __func__); + attr->rq_psn &= 0xffffff; + } + + if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { + dev_warn(&qp->device->dev, + " %s sq_psn overflow, masking to 24 bits\n", + __func__); + attr->sq_psn &= 0xffffff; + } + } + + ret = qp->device->modify_qp(qp, attr, attr_mask, udata); + if (ret) + goto out; + + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; +out: + return ret; +} + +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @ib_qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); +} +EXPORT_SYMBOL(ib_modify_qp_with_udata); int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, @@ -1319,7 +1485,7 @@ return 0; } -int ib_destroy_qp(struct ib_qp *qp) +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) { struct ib_pd *pd; struct ib_cq *scq, *rcq; @@ -1339,7 +1505,7 @@ srq = qp->srq; ind_tbl = qp->rwq_ind_tbl; - ret = qp->device->destroy_qp(qp); + ret = qp->device->destroy_qp(qp, udata); if (!ret) { if (pd) atomic_dec(&pd->usecnt); @@ -1355,32 +1521,40 @@ return ret; } -EXPORT_SYMBOL(ib_destroy_qp); +EXPORT_SYMBOL(ib_destroy_qp_user); /* Completion queues */ -struct ib_cq *ib_create_cq(struct ib_device *device, - ib_comp_handler comp_handler, - void (*event_handler)(struct ib_event *, void *), - void *cq_context, - const struct ib_cq_init_attr *cq_attr) +struct ib_cq *__ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, + const struct ib_cq_init_attr *cq_attr, + const char *caller) { struct ib_cq *cq; + int ret; - cq = device->create_cq(device, cq_attr, NULL, NULL); + cq = rdma_zalloc_drv_obj(device, ib_cq); + if (!cq) + return ERR_PTR(-ENOMEM); + + cq->device = device; + cq->uobject = NULL; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); - if (!IS_ERR(cq)) { - cq->device = device; - cq->uobject = NULL; - cq->comp_handler = comp_handler; - cq->event_handler = event_handler; - cq->cq_context = cq_context; - atomic_set(&cq->usecnt, 0); + ret = device->create_cq(cq, cq_attr, NULL); + if (ret) { + kfree(cq); + return ERR_PTR(ret); } return cq; } -EXPORT_SYMBOL(ib_create_cq); +EXPORT_SYMBOL(__ib_create_cq); int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { @@ -1389,14 +1563,16 @@ } EXPORT_SYMBOL(ib_modify_cq); -int ib_destroy_cq(struct ib_cq *cq) +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { if (atomic_read(&cq->usecnt)) return -EBUSY; - return cq->device->destroy_cq(cq); + cq->device->destroy_cq(cq, udata); + kfree(cq); + return 0; } -EXPORT_SYMBOL(ib_destroy_cq); +EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { @@ -1407,24 +1583,31 @@ /* Memory regions */ -int ib_dereg_mr(struct ib_mr *mr) +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; + struct ib_dm *dm = mr->dm; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; - ret = mr->device->dereg_mr(mr); - if (!ret) + ret = mr->device->dereg_mr(mr, udata); + if (!ret) { atomic_dec(&pd->usecnt); + if (dm) + atomic_dec(&dm->usecnt); + kfree(sig_attrs); + } return ret; } -EXPORT_SYMBOL(ib_dereg_mr); +EXPORT_SYMBOL(ib_dereg_mr_user); /** - * ib_alloc_mr() - Allocates a memory region + * ib_alloc_mr_user() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. + * @udata: user data or null for kernel objects * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. @@ -1432,27 +1615,38 @@ * max_num_sg * used_page_size. * */ -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct ib_mr *mr; - if (!pd->device->alloc_mr) - return ERR_PTR(-ENOSYS); + if (!pd->device->alloc_mr) { + mr = ERR_PTR(-EOPNOTSUPP); + goto out; + } + + if (mr_type == IB_MR_TYPE_INTEGRITY) { + WARN_ON_ONCE(1); + mr = ERR_PTR(-EINVAL); + goto out; + } - mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); + mr = pd->device->alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; + mr->dm = NULL; mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; + mr->type = mr_type; + mr->sig_attrs = NULL; } +out: return mr; } -EXPORT_SYMBOL(ib_alloc_mr); +EXPORT_SYMBOL(ib_alloc_mr_user); /* "Fast" memory regions */ @@ -1578,14 +1772,14 @@ } EXPORT_SYMBOL(ib_detach_mcast); -struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) +struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) { struct ib_xrcd *xrcd; if (!device->alloc_xrcd) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); - xrcd = device->alloc_xrcd(device, NULL, NULL); + xrcd = device->alloc_xrcd(device, NULL); if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; @@ -1596,9 +1790,9 @@ return xrcd; } -EXPORT_SYMBOL(ib_alloc_xrcd); +EXPORT_SYMBOL(__ib_alloc_xrcd); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct ib_qp *qp; int ret; @@ -1612,8 +1806,9 @@ if (ret) return ret; } + mutex_destroy(&xrcd->tgt_qp_mutex); - return xrcd->device->dealloc_xrcd(xrcd); + return xrcd->device->dealloc_xrcd(xrcd, udata); } EXPORT_SYMBOL(ib_dealloc_xrcd); @@ -1657,24 +1852,23 @@ EXPORT_SYMBOL(ib_create_wq); /** - * ib_destroy_wq - Destroys the specified WQ. + * ib_destroy_wq - Destroys the specified user WQ. * @wq: The WQ to destroy. + * @udata: Valid user data */ -int ib_destroy_wq(struct ib_wq *wq) +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { - int err; struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->destroy_wq(wq); - if (!err) { - atomic_dec(&pd->usecnt); - atomic_dec(&cq->usecnt); - } - return err; + wq->device->destroy_wq(wq, udata); + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + + return 0; } EXPORT_SYMBOL(ib_destroy_wq); @@ -1761,33 +1955,6 @@ } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain) -{ - struct ib_flow *flow_id; - if (!qp->device->create_flow) - return ERR_PTR(-ENOSYS); - - flow_id = qp->device->create_flow(qp, flow_attr, domain); - if (!IS_ERR(flow_id)) - atomic_inc(&qp->usecnt); - return flow_id; -} -EXPORT_SYMBOL(ib_create_flow); - -int ib_destroy_flow(struct ib_flow *flow_id) -{ - int err; - struct ib_qp *qp = flow_id->qp; - - err = qp->device->destroy_flow(flow_id); - if (!err) - atomic_dec(&qp->usecnt); - return err; -} -EXPORT_SYMBOL(ib_destroy_flow); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/sys/ofed/drivers/infiniband/core/rdma_core.h b/sys/ofed/drivers/infiniband/core/rdma_core.h new file mode 100644 --- /dev/null +++ b/sys/ofed/drivers/infiniband/core/rdma_core.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_CORE_H +#define RDMA_CORE_H + +#include +#include +#include +#include +#include +#include + +struct ib_uverbs_device; + +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason); + +int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); + +/* + * Get an ib_uobject that corresponds to the given id from ufile, assuming + * the object is from the given type. Lock it to the required access when + * applicable. + * This function could create (access == NEW), destroy (access == DESTROY) + * or unlock (access == READ || access == WRITE) objects if required. + * The action will be finalized only when uverbs_finalize_object or + * uverbs_finalize_objects are called. + */ +struct ib_uobject * +uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, + s64 id, struct uverbs_attr_bundle *attrs); + +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs); + +int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); + +void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); +void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); + +struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs); + +/* + * This is the runtime description of the uverbs API, used by the syscall + * machinery to validate and dispatch calls. + */ + +/* + * Depending on ID the slot pointer in the radix tree points at one of these + * structs. + */ + +struct uverbs_api_ioctl_method { + int(__rcu *handler)(struct uverbs_attr_bundle *attrs); + DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); + u16 bundle_size; + u8 use_stack:1; + u8 driver_method:1; + u8 disabled:1; + u8 has_udata:1; + u8 key_bitmap_len; + u8 destroy_bkey; +}; + +struct uverbs_api_write_method { + int (*handler)(struct uverbs_attr_bundle *attrs); + u8 disabled:1; + u8 is_ex:1; + u8 has_udata:1; + u8 has_resp:1; + u8 req_size; + u8 resp_size; +}; + +struct uverbs_api_attr { + struct uverbs_attr_spec spec; +}; + +struct uverbs_api { + /* radix tree contains struct uverbs_api_* pointers */ + struct radix_tree_root radix; + enum rdma_driver_id driver_id; + + unsigned int num_write; + unsigned int num_write_ex; + struct uverbs_api_write_method notsupp_method; + const struct uverbs_api_write_method **write_methods; + const struct uverbs_api_write_method **write_ex_methods; +}; + +/* + * Get an uverbs_api_object that corresponds to the given object_id. + * Note: + * -ENOMSG means that any object is allowed to match during lookup. + */ +static inline const struct uverbs_api_object * +uapi_get_object(struct uverbs_api *uapi, u16 object_id) +{ + const struct uverbs_api_object *res; + + if (object_id == UVERBS_IDR_ANY_OBJECT) + return ERR_PTR(-ENOMSG); + + res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); + if (!res) + return ERR_PTR(-ENOENT); + + return res; +} + +char *uapi_key_format(char *S, unsigned int key); +struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev); +void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); +void uverbs_disassociate_api(struct uverbs_api *uapi); +void uverbs_destroy_api(struct uverbs_api *uapi); +void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, + unsigned int num_attrs); +void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); + +extern const struct uapi_definition uverbs_def_obj_async_fd[]; +extern const struct uapi_definition uverbs_def_obj_counters[]; +extern const struct uapi_definition uverbs_def_obj_cq[]; +extern const struct uapi_definition uverbs_def_obj_device[]; +extern const struct uapi_definition uverbs_def_obj_dm[]; +extern const struct uapi_definition uverbs_def_obj_flow_action[]; +extern const struct uapi_definition uverbs_def_obj_intf[]; +extern const struct uapi_definition uverbs_def_obj_mr[]; +extern const struct uapi_definition uverbs_def_write_intf[]; + +static inline const struct uverbs_api_write_method * +uapi_get_method(const struct uverbs_api *uapi, u32 command) +{ + u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK; + + if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return ERR_PTR(-EINVAL); + + if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) { + if (cmd_idx >= uapi->num_write_ex) + return ERR_PTR(-EOPNOTSUPP); + return uapi->write_ex_methods[cmd_idx]; + } + + if (cmd_idx >= uapi->num_write) + return ERR_PTR(-EOPNOTSUPP); + return uapi->write_methods[cmd_idx]; +} + +void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, + struct ib_udata *udata, unsigned int attr_in, + unsigned int attr_out); + +#endif /* RDMA_CORE_H */ diff --git a/sys/ofed/drivers/infiniband/core/uverbs.h b/sys/ofed/drivers/infiniband/core/uverbs.h --- a/sys/ofed/drivers/infiniband/core/uverbs.h +++ b/sys/ofed/drivers/infiniband/core/uverbs.h @@ -49,10 +49,15 @@ #include #include #include +#include #include #include #include +#include + +#define UVERBS_MODULE_NAME ib_uverbs +#include static inline void ib_uverbs_init_udata(struct ib_udata *udata, @@ -90,53 +95,76 @@ * an asynchronous event queue file is created and released when the * event file is closed. * - * struct ib_uverbs_event_file: One reference is held by the VFS and - * released when the file is closed. For asynchronous event files, - * another reference is held by the corresponding main context file - * and released when that file is closed. For completion event files, - * a reference is taken when a CQ is created that uses the file, and - * released when the CQ is destroyed. + * struct ib_uverbs_event_queue: Base structure for + * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file. + * One reference is held by the VFS and released when the file is closed. + * For asynchronous event files, another reference is held by the corresponding + * main context file and released when that file is closed. For completion + * event files, a reference is taken when a CQ is created that uses the file, + * and released when the CQ is destroyed. */ struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; - struct device *dev; + struct device dev; struct ib_device __rcu *ib_dev; int devnum; struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; - struct kobject kobj; struct srcu_struct disassociate_srcu; struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; - struct list_head uverbs_events_file_list; + struct uverbs_api *uapi; }; -struct ib_uverbs_event_file { - struct kref ref; - int is_async; - struct ib_uverbs_file *uverbs_file; +struct ib_uverbs_event_queue { spinlock_t lock; int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; - struct list_head list; +}; + +struct ib_uverbs_async_event_file { + struct ib_uobject uobj; + struct ib_uverbs_event_queue ev_queue; + struct ib_event_handler event_handler; +}; + +struct ib_uverbs_completion_event_file { + struct ib_uobject uobj; + struct ib_uverbs_event_queue ev_queue; }; struct ib_uverbs_file { struct kref ref; - struct mutex mutex; - struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; + struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ struct ib_ucontext *ucontext; - struct ib_event_handler event_handler; - struct ib_uverbs_event_file *async_file; + struct ib_uverbs_async_event_file *async_file; struct list_head list; - int is_closed; + + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; + struct list_head uobjects; + + struct mutex umap_lock; + struct list_head umaps; + + struct xarray idr; }; struct ib_uverbs_event { @@ -157,6 +185,7 @@ struct ib_uevent_object { struct ib_uobject uobject; + /* List member for ib_uverbs_async_event_file list */ struct list_head event_list; u32 events_reported; }; @@ -184,51 +213,40 @@ }; struct ib_ucq_object { - struct ib_uobject uobject; - struct ib_uverbs_file *uverbs_file; + struct ib_uevent_object uevent; struct list_head comp_list; - struct list_head async_list; u32 comp_events_reported; - u32 async_events_reported; }; -extern spinlock_t ib_uverbs_idr_lock; -extern struct idr ib_uverbs_pd_idr; -extern struct idr ib_uverbs_mr_idr; -extern struct idr ib_uverbs_mw_idr; -extern struct idr ib_uverbs_ah_idr; -extern struct idr ib_uverbs_cq_idr; -extern struct idr ib_uverbs_qp_idr; -extern struct idr ib_uverbs_srq_idr; -extern struct idr ib_uverbs_xrcd_idr; -extern struct idr ib_uverbs_rule_idr; -extern struct idr ib_uverbs_wq_idr; -extern struct idr ib_uverbs_rwq_ind_tbl_idr; - -void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); - -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async); -void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); - -void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, +extern const struct file_operations uverbs_event_fops; +extern const struct file_operations uverbs_async_event_fops; +void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); +void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file); +void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue); +void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); + +int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs); +int ib_init_ucontext(struct uverbs_attr_bundle *attrs); + +void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj); -void ib_uverbs_release_uevent(struct ib_uverbs_file *file, - struct ib_uevent_object *uobj); +void ib_uverbs_release_uevent(struct ib_uevent_object *uobj); +void ib_uverbs_release_file(struct kref *ref); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); -void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event); -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); int uverbs_dealloc_mw(struct ib_mw *mw); +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj); + +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); struct ib_uverbs_flow_spec { union { @@ -242,68 +260,45 @@ }; struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; + struct ib_uverbs_flow_spec_esp esp; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; + struct ib_uverbs_flow_spec_action_tag flow_tag; + struct ib_uverbs_flow_spec_action_drop drop; + struct ib_uverbs_flow_spec_action_handle action; + struct ib_uverbs_flow_spec_action_count flow_count; }; }; -#define IB_UVERBS_DECLARE_CMD(name) \ - ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ - const char __user *buf, int in_len, \ - int out_len) - -IB_UVERBS_DECLARE_CMD(get_context); -IB_UVERBS_DECLARE_CMD(query_device); -IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(alloc_pd); -IB_UVERBS_DECLARE_CMD(dealloc_pd); -IB_UVERBS_DECLARE_CMD(reg_mr); -IB_UVERBS_DECLARE_CMD(rereg_mr); -IB_UVERBS_DECLARE_CMD(dereg_mr); -IB_UVERBS_DECLARE_CMD(alloc_mw); -IB_UVERBS_DECLARE_CMD(dealloc_mw); -IB_UVERBS_DECLARE_CMD(create_comp_channel); -IB_UVERBS_DECLARE_CMD(create_cq); -IB_UVERBS_DECLARE_CMD(resize_cq); -IB_UVERBS_DECLARE_CMD(poll_cq); -IB_UVERBS_DECLARE_CMD(req_notify_cq); -IB_UVERBS_DECLARE_CMD(destroy_cq); -IB_UVERBS_DECLARE_CMD(create_qp); -IB_UVERBS_DECLARE_CMD(open_qp); -IB_UVERBS_DECLARE_CMD(query_qp); -IB_UVERBS_DECLARE_CMD(modify_qp); -IB_UVERBS_DECLARE_CMD(destroy_qp); -IB_UVERBS_DECLARE_CMD(post_send); -IB_UVERBS_DECLARE_CMD(post_recv); -IB_UVERBS_DECLARE_CMD(post_srq_recv); -IB_UVERBS_DECLARE_CMD(create_ah); -IB_UVERBS_DECLARE_CMD(destroy_ah); -IB_UVERBS_DECLARE_CMD(attach_mcast); -IB_UVERBS_DECLARE_CMD(detach_mcast); -IB_UVERBS_DECLARE_CMD(create_srq); -IB_UVERBS_DECLARE_CMD(modify_srq); -IB_UVERBS_DECLARE_CMD(query_srq); -IB_UVERBS_DECLARE_CMD(destroy_srq); -IB_UVERBS_DECLARE_CMD(create_xsrq); -IB_UVERBS_DECLARE_CMD(open_xrcd); -IB_UVERBS_DECLARE_CMD(close_xrcd); - -#define IB_UVERBS_DECLARE_EX_CMD(name) \ - int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ - struct ib_device *ib_dev, \ - struct ib_udata *ucore, \ - struct ib_udata *uhw) - -IB_UVERBS_DECLARE_EX_CMD(create_flow); -IB_UVERBS_DECLARE_EX_CMD(destroy_flow); -IB_UVERBS_DECLARE_EX_CMD(query_device); -IB_UVERBS_DECLARE_EX_CMD(create_cq); -IB_UVERBS_DECLARE_EX_CMD(create_qp); -IB_UVERBS_DECLARE_EX_CMD(create_wq); -IB_UVERBS_DECLARE_EX_CMD(modify_wq); -IB_UVERBS_DECLARE_EX_CMD(destroy_wq); -IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); -IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); +int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, + const void *kern_spec_mask, + const void *kern_spec_val, + size_t kern_filter_sz, + union ib_flow_spec *ib_spec); + +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static inline u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + +void copy_port_attr_to_resp(struct ib_port_attr *attr, + struct ib_uverbs_query_port_resp *resp, + struct ib_device *ib_dev, u8 port_num); #endif /* UVERBS_H */ diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -70,7 +70,7 @@ ah->last_send = 0; kref_init(&ah->ref); - ah->ah = ib_create_ah(pd, attr); + ah->ah = ib_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(ah->ah)) { kfree(ah); ah = NULL; @@ -572,7 +572,7 @@ list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); - ib_destroy_ah(ah->ah); + ib_destroy_ah(ah->ah, 0); kfree(ah); } diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -750,12 +750,8 @@ } if (ssk->rx_ring.cq) { - if (ib_destroy_cq(ssk->rx_ring.cq)) { - sdp_warn(ssk->socket, "destroy cq(%p) failed\n", - ssk->rx_ring.cq); - } else { - ssk->rx_ring.cq = NULL; - } + ib_destroy_cq(ssk->rx_ring.cq); + ssk->rx_ring.cq = NULL; } WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring)); diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c @@ -476,12 +476,8 @@ } if (ssk->tx_ring.cq) { - if (ib_destroy_cq(ssk->tx_ring.cq)) { - sdp_warn(ssk->socket, "destroy cq(%p) failed\n", - ssk->tx_ring.cq); - } else { - ssk->tx_ring.cq = NULL; - } + ib_destroy_cq(ssk->tx_ring.cq); + ssk->tx_ring.cq = NULL; } WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring)); diff --git a/sys/ofed/include/rdma/ib_verbs.h b/sys/ofed/include/rdma/ib_verbs.h --- a/sys/ofed/include/rdma/ib_verbs.h +++ b/sys/ofed/include/rdma/ib_verbs.h @@ -60,18 +60,31 @@ #include #include #include +#include #include +#include +#include +#include +#include #include #include +struct ib_uqp_object; +struct ib_usrq_object; +struct ib_uwq_object; struct ifla_vf_info; struct ifla_vf_stats; struct ib_uverbs_file; +struct uverbs_attr_bundle; + +enum ib_uverbs_advise_mr_advice; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; +struct ib_ucq_object; + union ib_gid { u8 raw[16]; struct { @@ -231,17 +244,6 @@ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), }; -enum ib_signature_prot_cap { - IB_PROT_T10DIF_TYPE_1 = 1, - IB_PROT_T10DIF_TYPE_2 = 1 << 1, - IB_PROT_T10DIF_TYPE_3 = 1 << 2, -}; - -enum ib_signature_guard_cap { - IB_GUARD_T10DIF_CRC = 1, - IB_GUARD_T10DIF_CSUM = 1 << 1, -}; - enum ib_atomic_cap { IB_ATOMIC_NONE, IB_ATOMIC_HCA, @@ -266,6 +268,7 @@ uint32_t rc_odp_caps; uint32_t uc_odp_caps; uint32_t ud_odp_caps; + uint32_t xrc_odp_caps; } per_transport_caps; }; @@ -279,6 +282,24 @@ u32 max_rwq_indirection_table_size; }; +enum ib_tm_cap_flags { + /* Support tag matching with rendezvous offload for RC transport */ + IB_TM_CAP_RNDV_RC = 1 << 0, +}; + +struct ib_tm_caps { + /* Max size of RNDV header */ + u32 max_rndv_hdr_size; + /* Max number of entries in tag matching list */ + u32 max_num_tags; + /* From enum ib_tm_cap_flags */ + u32 flags; + /* Max number of outstanding list operations */ + u32 max_ops; + /* Max number of SGE in tag matching entry */ + u32 max_sge; +}; + enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, @@ -290,6 +311,27 @@ u32 flags; }; +enum ib_cq_attr_mask { + IB_CQ_MODERATE = 1 << 0, +}; + +struct ib_cq_caps { + u16 max_cq_moderation_count; + u16 max_cq_moderation_period; +}; + +struct ib_dm_mr_attr { + u64 length; + u64 offset; + u32 access_flags; +}; + +struct ib_dm_alloc_attr { + u64 length; + u32 alignment; + u32 flags; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -327,7 +369,11 @@ int max_map_per_fmr; int max_srq; int max_srq_wr; - int max_srq_sge; + union { + int max_srq_sge; + int max_send_sge; + int max_recv_sge; + }; unsigned int max_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; @@ -338,6 +384,12 @@ uint64_t hca_core_clock; /* in KHZ */ struct ib_rss_caps rss_caps; u32 max_wq_type_rq; + u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ + struct ib_tm_caps tm_caps; + struct ib_cq_caps cq_caps; + u64 max_dm_size; + /* Max entries for sgl for optimized performance per READ */ + u32 max_sgl_rd; }; enum ib_mtu { @@ -540,6 +592,8 @@ enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; + unsigned int ip_gids:1; + /* This is the value from PortInfo CapabilityMask, defined by IBA */ u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; @@ -716,105 +770,26 @@ * enum ib_mr_type - memory region type * @IB_MR_TYPE_MEM_REG: memory region that is used for * normal registration - * @IB_MR_TYPE_SIGNATURE: memory region that is used for - * signature operations (data-integrity - * capable regions) * @IB_MR_TYPE_SG_GAPS: memory region that is capable to * register any arbitrary sg lists (without * the normal mr constraints - see * ib_map_mr_sg) + * @IB_MR_TYPE_DM: memory region that is used for device + * memory registration + * @IB_MR_TYPE_USER: memory region that is used for the user-space + * application + * @IB_MR_TYPE_DMA: memory region that is used for DMA operations + * without address translations (VA=PA) + * @IB_MR_TYPE_INTEGRITY: memory region that is used for + * data integrity operations */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, - IB_MR_TYPE_SIGNATURE, IB_MR_TYPE_SG_GAPS, -}; - -/** - * Signature types - * IB_SIG_TYPE_NONE: Unprotected. - * IB_SIG_TYPE_T10_DIF: Type T10-DIF - */ -enum ib_signature_type { - IB_SIG_TYPE_NONE, - IB_SIG_TYPE_T10_DIF, -}; - -/** - * Signature T10-DIF block-guard types - * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. - * IB_T10DIF_CSUM: Corresponds to IP checksum rules. - */ -enum ib_t10_dif_bg_type { - IB_T10DIF_CRC, - IB_T10DIF_CSUM -}; - -/** - * struct ib_t10_dif_domain - Parameters specific for T10-DIF - * domain. - * @bg_type: T10-DIF block guard type (CRC|CSUM) - * @pi_interval: protection information interval. - * @bg: seed of guard computation. - * @app_tag: application tag of guard block - * @ref_tag: initial guard block reference tag. - * @ref_remap: Indicate wethear the reftag increments each block - * @app_escape: Indicate to skip block check if apptag=0xffff - * @ref_escape: Indicate to skip block check if reftag=0xffffffff - * @apptag_check_mask: check bitmask of application tag. - */ -struct ib_t10_dif_domain { - enum ib_t10_dif_bg_type bg_type; - u16 pi_interval; - u16 bg; - u16 app_tag; - u32 ref_tag; - bool ref_remap; - bool app_escape; - bool ref_escape; - u16 apptag_check_mask; -}; - -/** - * struct ib_sig_domain - Parameters for signature domain - * @sig_type: specific signauture type - * @sig: union of all signature domain attributes that may - * be used to set domain layout. - */ -struct ib_sig_domain { - enum ib_signature_type sig_type; - union { - struct ib_t10_dif_domain dif; - } sig; -}; - -/** - * struct ib_sig_attrs - Parameters for signature handover operation - * @check_mask: bitmask for signature byte check (8 bytes) - * @mem: memory domain layout desciptor. - * @wire: wire domain layout desciptor. - */ -struct ib_sig_attrs { - u8 check_mask; - struct ib_sig_domain mem; - struct ib_sig_domain wire; -}; - -enum ib_sig_err_type { - IB_SIG_BAD_GUARD, - IB_SIG_BAD_REFTAG, - IB_SIG_BAD_APPTAG, -}; - -/** - * struct ib_sig_err - signature error descriptor - */ -struct ib_sig_err { - enum ib_sig_err_type err_type; - u32 expected; - u32 actual; - u64 sig_err_offset; - u32 key; + IB_MR_TYPE_DM, + IB_MR_TYPE_USER, + IB_MR_TYPE_DMA, + IB_MR_TYPE_INTEGRITY, }; enum ib_mr_status_check { @@ -944,9 +919,16 @@ enum ib_srq_type { IB_SRQT_BASIC, - IB_SRQT_XRC + IB_SRQT_XRC, + IB_SRQT_TM, }; +static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) +{ + return srq_type == IB_SRQT_XRC || + srq_type == IB_SRQT_TM; +} + enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, @@ -964,11 +946,17 @@ struct ib_srq_attr attr; enum ib_srq_type srq_type; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + } xrc; + + struct { + u32 max_num_tags; + } tag_matching; + }; } ext; }; @@ -1010,6 +998,7 @@ IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, IB_QPT_MAX, + IB_QPT_DRIVER = 0xFF, /* Reserve a range for qp types internal to the low level driver. * These qp types will not be visible at the IB core layer, so the * IB_QPT_MAX usages should not be affected in the core layer @@ -1036,6 +1025,9 @@ IB_QP_CREATE_SIGNATURE_EN = 1 << 6, IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, IB_QP_CREATE_SCATTER_FCS = 1 << 8, + IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, + IB_QP_CREATE_SOURCE_QPN = 1 << 10, + IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1063,6 +1055,7 @@ */ u8 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; + u32 source_qpn; }; struct ib_qp_open_attr { @@ -1336,13 +1329,19 @@ }; enum ib_access_flags { - IB_ACCESS_LOCAL_WRITE = 1, - IB_ACCESS_REMOTE_WRITE = (1<<1), - IB_ACCESS_REMOTE_READ = (1<<2), - IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4), - IB_ZERO_BASED = (1<<5), - IB_ACCESS_ON_DEMAND = (1<<6), + IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE, + IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE, + IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ, + IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC, + IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND, + IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, + IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, + IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, + + IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, + IB_ACCESS_SUPPORTED = + ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* @@ -1378,57 +1377,48 @@ RDMA_REMOVE_ABORT, }; +struct ib_rdmacg_object { +}; + struct ib_ucontext { struct ib_device *device; - struct list_head pd_list; - struct list_head mr_list; - struct list_head mw_list; - struct list_head cq_list; - struct list_head qp_list; - struct list_head srq_list; - struct list_head ah_list; - struct list_head xrcd_list; - struct list_head rule_list; - struct list_head wq_list; - struct list_head rwq_ind_tbl_list; - int closing; + struct ib_uverbs_file *ufile; + /* + * 'closing' can be read by the driver only during a destroy callback, + * it is set when we are closing the file descriptor and indicates + * that mm_sem may be locked. + */ + bool closing; bool cleanup_retryable; - pid_t tgid; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct rb_root umem_tree; + struct ib_rdmacg_object cg_obj; /* - * Protects .umem_rbroot and tree, as well as odp_mrs_count and - * mmu notifiers registration. + * Implementation details of the RDMA core, don't use in drivers: */ - struct rw_semaphore umem_rwsem; - void (*invalidate_range)(struct ib_umem *umem, - unsigned long start, unsigned long end); - - struct mmu_notifier mn; - atomic_t notifier_count; - /* A list of umems that don't have private mmu notifier counters yet. */ - struct list_head no_private_counters; - int odp_mrs_count; -#endif + struct xarray mmap_xa; }; struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ + /* ufile & ucontext owning this object */ + struct ib_uverbs_file *ufile; + /* FIXME, save memory: ufile->context == context */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ + struct ib_rdmacg_object cg_obj; /* rdmacg object */ int id; /* index into kernel idr */ struct kref ref; - struct rw_semaphore mutex; /* protects .live */ + atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ - int live; + + const struct uverbs_api_object *uapi_object; }; struct ib_udata { - const void __user *inbuf; - void __user *outbuf; + const u8 __user *inbuf; + u8 __user *outbuf; size_t inlen; size_t outlen; }; @@ -1473,7 +1463,7 @@ struct ib_cq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_ucq_object *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; @@ -1486,18 +1476,20 @@ struct ib_srq { struct ib_device *device; struct ib_pd *pd; - struct ib_uobject *uobject; + struct ib_usrq_object *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; atomic_t usecnt; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - u32 srq_num; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + u32 srq_num; + } xrc; + }; } ext; }; @@ -1513,7 +1505,7 @@ struct ib_wq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_uwq_object *uobject; void *wq_context; void (*event_handler)(struct ib_event *, void *); struct ib_pd *pd; @@ -1524,6 +1516,13 @@ atomic_t usecnt; }; +enum ib_wq_flags { + IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, + IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, + IB_WQ_FLAGS_DELAY_DROP = 1 << 2, + IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, +}; + struct ib_wq_init_attr { void *wq_context; enum ib_wq_type wq_type; @@ -1531,16 +1530,20 @@ u32 max_sge; struct ib_cq *cq; void (*event_handler)(struct ib_event *, void *); + u32 create_flags; /* Use enum ib_wq_flags */ }; enum ib_wq_attr_mask { - IB_WQ_STATE = 1 << 0, - IB_WQ_CUR_STATE = 1 << 1, + IB_WQ_STATE = 1 << 0, + IB_WQ_CUR_STATE = 1 << 1, + IB_WQ_FLAGS = 1 << 2, }; struct ib_wq_attr { enum ib_wq_state wq_state; enum ib_wq_state curr_wq_state; + u32 flags; /* Use enum ib_wq_flags */ + u32 flags_mask; /* Use enum ib_wq_flags */ }; struct ib_rwq_ind_table { @@ -1576,7 +1579,7 @@ atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; - struct ib_uobject *uobject; + struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; @@ -1584,6 +1587,15 @@ u32 max_read_sge; enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; + u8 port; +}; + +struct ib_dm { + struct ib_device *device; + u32 length; + u32 flags; + struct ib_uobject *uobject; + atomic_t usecnt; }; struct ib_mr { @@ -1594,11 +1606,15 @@ u64 iova; u64 length; unsigned int page_size; + enum ib_mr_type type; bool need_inval; union { struct ib_uobject *uobject; /* user */ struct list_head qp_entry; /* FR */ }; + + struct ib_dm *dm; + struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */ }; struct ib_mw { @@ -1636,17 +1652,27 @@ /* Supported steering header types */ enum ib_flow_spec_type { /* L2 headers*/ - IB_FLOW_SPEC_ETH = 0x20, - IB_FLOW_SPEC_IB = 0x22, + IB_FLOW_SPEC_ETH = 0x20, + IB_FLOW_SPEC_IB = 0x22, /* L3 header*/ - IB_FLOW_SPEC_IPV4 = 0x30, - IB_FLOW_SPEC_IPV6 = 0x31, + IB_FLOW_SPEC_IPV4 = 0x30, + IB_FLOW_SPEC_IPV6 = 0x31, + IB_FLOW_SPEC_ESP = 0x34, /* L4 headers*/ - IB_FLOW_SPEC_TCP = 0x40, - IB_FLOW_SPEC_UDP = 0x41 + IB_FLOW_SPEC_TCP = 0x40, + IB_FLOW_SPEC_UDP = 0x41, + IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50, + IB_FLOW_SPEC_GRE = 0x51, + IB_FLOW_SPEC_MPLS = 0x60, + IB_FLOW_SPEC_INNER = 0x100, + /* Actions */ + IB_FLOW_SPEC_ACTION_TAG = 0x1000, + IB_FLOW_SPEC_ACTION_DROP = 0x1001, + IB_FLOW_SPEC_ACTION_HANDLE = 0x1002, + IB_FLOW_SPEC_ACTION_COUNT = 0x1003, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 -#define IB_FLOW_SPEC_SUPPORT_LAYERS 4 +#define IB_FLOW_SPEC_SUPPORT_LAYERS 10 /* Flow steering rule priority is set according to it's domain. * Lower domain value means higher priority. @@ -1751,9 +1777,94 @@ struct ib_flow_tcp_udp_filter mask; }; +struct ib_flow_tunnel_filter { + __be32 tunnel_id; + u8 real_sz[0]; +}; + +/* ib_flow_spec_tunnel describes the Vxlan tunnel + * the tunnel_id from val has the vni value + */ +struct ib_flow_spec_tunnel { + u32 type; + u16 size; + struct ib_flow_tunnel_filter val; + struct ib_flow_tunnel_filter mask; +}; + +struct ib_flow_esp_filter { + __be32 spi; + __be32 seq; + /* Must be last */ + u8 real_sz[0]; +}; + +struct ib_flow_spec_esp { + u32 type; + u16 size; + struct ib_flow_esp_filter val; + struct ib_flow_esp_filter mask; +}; + +struct ib_flow_gre_filter { + __be16 c_ks_res0_ver; + __be16 protocol; + __be32 key; + /* Must be last */ + u8 real_sz[0]; +}; + +struct ib_flow_spec_gre { + u32 type; + u16 size; + struct ib_flow_gre_filter val; + struct ib_flow_gre_filter mask; +}; + +struct ib_flow_mpls_filter { + __be32 tag; + /* Must be last */ + u8 real_sz[0]; +}; + +struct ib_flow_spec_mpls { + u32 type; + u16 size; + struct ib_flow_mpls_filter val; + struct ib_flow_mpls_filter mask; +}; + +struct ib_flow_spec_action_tag { + enum ib_flow_spec_type type; + u16 size; + u32 tag_id; +}; + +struct ib_flow_spec_action_drop { + enum ib_flow_spec_type type; + u16 size; +}; + +struct ib_flow_spec_action_handle { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_action *act; +}; + +enum ib_counters_description { + IB_COUNTER_PACKETS, + IB_COUNTER_BYTES, +}; + +struct ib_flow_spec_action_count { + enum ib_flow_spec_type type; + u16 size; + struct ib_counters *counters; +}; + union ib_flow_spec { struct { - enum ib_flow_spec_type type; + u32 type; u16 size; }; struct ib_flow_spec_eth eth; @@ -1761,6 +1872,14 @@ struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; struct ib_flow_spec_ipv6 ipv6; + struct ib_flow_spec_tunnel tunnel; + struct ib_flow_spec_esp esp; + struct ib_flow_spec_gre gre; + struct ib_flow_spec_mpls mpls; + struct ib_flow_spec_action_tag flow_tag; + struct ib_flow_spec_action_drop drop; + struct ib_flow_spec_action_handle action; + struct ib_flow_spec_action_count flow_count; }; struct ib_flow_attr { @@ -1770,17 +1889,74 @@ u32 flags; u8 num_of_specs; u8 port; - /* Following are the optional layers according to user request - * struct ib_flow_spec_xxx - * struct ib_flow_spec_yyy - */ + union ib_flow_spec flows[0]; }; struct ib_flow { struct ib_qp *qp; + struct ib_device *device; struct ib_uobject *uobject; }; +enum ib_flow_action_type { + IB_FLOW_ACTION_UNSPECIFIED, + IB_FLOW_ACTION_ESP = 1, +}; + +struct ib_flow_action_attrs_esp_keymats { + enum ib_uverbs_flow_action_esp_keymat protocol; + union { + struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm; + } keymat; +}; + +struct ib_flow_action_attrs_esp_replays { + enum ib_uverbs_flow_action_esp_replay protocol; + union { + struct ib_uverbs_flow_action_esp_replay_bmp bmp; + } replay; +}; + +enum ib_flow_action_attrs_esp_flags { + /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags + * This is done in order to share the same flags between user-space and + * kernel and spare an unnecessary translation. + */ + + /* Kernel flags */ + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED = 1ULL << 32, + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS = 1ULL << 33, +}; + +struct ib_flow_spec_list { + struct ib_flow_spec_list *next; + union ib_flow_spec spec; +}; + +struct ib_flow_action_attrs_esp { + struct ib_flow_action_attrs_esp_keymats *keymat; + struct ib_flow_action_attrs_esp_replays *replay; + struct ib_flow_spec_list *encap; + /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled. + * Value of 0 is a valid value. + */ + u32 esn; + u32 spi; + u32 seq; + u32 tfc_pad; + /* Use enum ib_flow_action_attrs_esp_flags */ + u64 flags; + u64 hard_limit_pkts; +}; + +struct ib_flow_action { + struct ib_device *device; + struct ib_uobject *uobject; + enum ib_flow_action_type type; + atomic_t usecnt; +}; + + struct ib_mad_hdr; struct ib_grh; @@ -1863,8 +2039,71 @@ u32 max_mad_size; }; +struct ib_counters { + struct ib_device *device; + struct ib_uobject *uobject; + /* num of objects attached */ + atomic_t usecnt; +}; + +struct ib_counters_read_attr { + u64 *counters_buff; + u32 ncounters; + u32 flags; /* use enum ib_read_counters_flags */ +}; + +#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ + .size_##ib_struct = \ + (sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \ + BUILD_BUG_ON_ZERO( \ + !__same_type(((struct drv_struct *)NULL)->member, \ + struct ib_struct))) + +#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ + ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp)) + +#define rdma_zalloc_drv_obj(ib_dev, ib_type) \ + rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) + +#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct + +struct rdma_user_mmap_entry { + struct kref ref; + struct ib_ucontext *ucontext; + unsigned long start_pgoff; + size_t npages; + bool driver_removed; +}; + +/* Return the offset (in bytes) the user should pass to libc's mmap() */ +static inline u64 +rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry) +{ + return (u64)entry->start_pgoff << PAGE_SHIFT; +} + +struct ib_device_ops { + enum rdma_driver_id driver_id; + DECLARE_RDMA_OBJ_SIZE(ib_ah); + DECLARE_RDMA_OBJ_SIZE(ib_cq); + DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_srq); + DECLARE_RDMA_OBJ_SIZE(ib_ucontext); +}; + +#define INIT_IB_DEVICE_OPS(pop, driver, DRIVER) do { \ + (pop)[0] .driver_id = RDMA_DRIVER_##DRIVER; \ + (pop)[0] INIT_RDMA_OBJ_SIZE(ib_ah, driver##_ib_ah, ibah); \ + (pop)[0] INIT_RDMA_OBJ_SIZE(ib_cq, driver##_ib_cq, ibcq); \ + (pop)[0] INIT_RDMA_OBJ_SIZE(ib_pd, driver##_ib_pd, ibpd); \ + (pop)[0] INIT_RDMA_OBJ_SIZE(ib_srq, driver##_ib_srq, ibsrq); \ + (pop)[0] INIT_RDMA_OBJ_SIZE(ib_ucontext, driver##_ib_ucontext, ibucontext); \ +} while (0) + struct ib_device { struct device *dma_device; + struct ib_device_ops ops; char name[IB_DEVICE_NAME_MAX]; @@ -1967,24 +2206,22 @@ int (*modify_port)(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); - struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + int (*alloc_ucontext)(struct ib_ucontext *uctx, struct ib_udata *udata); - int (*dealloc_ucontext)(struct ib_ucontext *context); + void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); - struct ib_pd * (*alloc_pd)(struct ib_device *device, - struct ib_ucontext *context, + int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); - int (*dealloc_pd)(struct ib_pd *pd); - struct ib_ah * (*create_ah)(struct ib_pd *pd, - struct ib_ah_attr *ah_attr, - struct ib_udata *udata); + void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah); - struct ib_srq * (*create_srq)(struct ib_pd *pd, + void (*destroy_ah)(struct ib_ah *ah, u32 flags); + int (*create_srq)(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, @@ -1993,7 +2230,7 @@ struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - int (*destroy_srq)(struct ib_srq *srq); + void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); int (*post_srq_recv)(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); @@ -2008,20 +2245,19 @@ struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - int (*destroy_qp)(struct ib_qp *qp); + int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, + int (*create_cq)(struct ib_cq *, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); - int (*destroy_cq)(struct ib_cq *cq); + void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); int (*poll_cq)(struct ib_cq *cq, int num_entries, @@ -2045,10 +2281,13 @@ int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); - int (*dereg_mr)(struct ib_mr *mr); - struct ib_mr * (*alloc_mr)(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); + int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); + struct ib_mr * (*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); + int (*advise_mr)(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, u32 flags, + const struct ib_sge *sg_list, u32 num_sge, + struct uverbs_attr_bundle *attrs); int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, @@ -2082,16 +2321,31 @@ size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, - struct ib_ucontext *ucontext, struct ib_udata *udata); - int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow * (*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain); + int domain, struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); + struct ib_flow_action *(*create_flow_action_esp)( + struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + int (*destroy_flow_action)(struct ib_flow_action *action); + int (*modify_flow_action_esp)( + struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); + /** + * This will be called once refcount of an entry in mmap_xa reaches + * zero. The type of the memory that was mapped may differ between + * entries and is opaque to the rdma_user_mmap interface. + * Therefore needs to be implemented by the driver in mmap_free. + */ + void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); void (*drain_rq)(struct ib_qp *qp); void (*drain_sq)(struct ib_qp *qp); @@ -2106,7 +2360,7 @@ struct ib_wq * (*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); - int (*destroy_wq)(struct ib_wq *wq); + void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, @@ -2115,6 +2369,20 @@ struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); + struct ib_dm *(*alloc_dm)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs); + int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); + struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs); + struct ib_counters *(*create_counters)( + struct ib_device *device, struct uverbs_attr_bundle *attrs); + int (*destroy_counters)(struct ib_counters *counters); + int (*read_counters)(struct ib_counters *counters, + struct ib_counters_read_attr *counters_read_attr, + struct uverbs_attr_bundle *attrs); struct ib_dma_mapping_ops *dma_ops; struct module *owner; @@ -2142,6 +2410,8 @@ struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; + const struct uapi_definition *driver_def; + /** * The following mandatory functions are used only at device * registration. Keep functions such as these at the end of this @@ -2199,6 +2469,26 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry); +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length); +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff); + +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma); +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry); + +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; @@ -2209,11 +2499,9 @@ return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } -static inline bool ib_is_udata_cleared(struct ib_udata *udata, - size_t offset, - size_t len) +static inline bool ib_is_buffer_cleared(const void __user *p, + size_t len) { - const void __user *p = (const char __user *)udata->inbuf + offset; bool ret; u8 *buf; @@ -2229,6 +2517,13 @@ return ret; } +static inline bool ib_is_udata_cleared(struct ib_udata *udata, + size_t offset, + size_t len) +{ + return ib_is_buffer_cleared(udata->inbuf + offset, len); +} + /** * ib_is_destroy_retryable - Check whether the uobject destruction * is retryable. @@ -2658,17 +2953,57 @@ const char *caller); #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), __func__) -void ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_dealloc_pd_user - Deallocate kernel/user PD + * @pd: The protection domain + * @udata: Valid user data or NULL for kernel objects + */ +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); + +/** + * ib_dealloc_pd - Deallocate kernel PD + * @pd: The protection domain + * + * NOTE: for user PD use ib_dealloc_pd_user with valid udata! + */ +static inline void ib_dealloc_pd(struct ib_pd *pd) +{ + ib_dealloc_pd_user(pd, NULL); +} + +enum rdma_create_ah_flags { + /* In a sleepable context */ + RDMA_CREATE_AH_SLEEPABLE = BIT(0), +}; /** * ib_create_ah - Creates an address handle for the given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. + * @flags: Create address handle flags (see enum rdma_create_ah_flags). + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + u32 flags); + +/** + * ib_create_user_ah - Creates an address handle for the given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. * + * It returns 0 on success and returns appropriate error code on error. * The address handle is used to reference a local or global destination * in all UD QP post sends. */ -struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *ib_create_user_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); /** * ib_init_ah_from_wc - Initializes address handle attributes from a @@ -2718,11 +3053,30 @@ */ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); +enum rdma_destroy_ah_flags { + /* In a sleepable context */ + RDMA_DESTROY_AH_SLEEPABLE = BIT(0), +}; + /** - * ib_destroy_ah - Destroys an address handle. + * ib_destroy_ah_user - Destroys an address handle. * @ah: The address handle to destroy. + * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_ah(struct ib_ah *ah); +int ib_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata); + +/** + * rdma_destroy_ah - Destroys an kernel address handle. + * @ah: The address handle to destroy. + * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * + * NOTE: for user ah use ib_destroy_ah_user with valid udata! + */ +static inline int ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return ib_destroy_ah_user(ah, flags, NULL); +} /** * ib_create_srq - Creates a SRQ associated with the specified protection @@ -2766,10 +3120,22 @@ struct ib_srq_attr *srq_attr); /** - * ib_destroy_srq - Destroys the specified SRQ. + * ib_destroy_srq_user - Destroys the specified SRQ. * @srq: The SRQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_srq(struct ib_srq *srq); +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata); + +/** + * ib_destroy_srq - Destroys the specified kernel SRQ. + * @srq: The SRQ to destroy. + * + * NOTE: for user srq use ib_destroy_srq_user with valid udata! + */ +static inline int ib_destroy_srq(struct ib_srq *srq) +{ + return ib_destroy_srq_user(srq, NULL); +} /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. @@ -2796,6 +3162,22 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *qp, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata); + /** * ib_modify_qp - Modifies the attributes for the specified QP and then * transitions the QP to the given state. @@ -2828,8 +3210,20 @@ /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. + * @udata: Valid udata or NULL for kernel objects + */ +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata); + +/** + * ib_destroy_qp - Destroys the specified kernel QP. + * @qp: The QP to destroy. + * + * NOTE: for user qp use ib_destroy_qp_user with valid udata! */ -int ib_destroy_qp(struct ib_qp *qp); +static inline int ib_destroy_qp(struct ib_qp *qp) +{ + return ib_destroy_qp_user(qp, NULL); +} /** * ib_open_qp - Obtain a reference to an existing sharable QP. @@ -2885,9 +3279,65 @@ return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } -struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); -void ib_free_cq(struct ib_cq *cq); +struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata); + +/** + * ib_alloc_cq_user: Allocate kernel/user CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * @udata: Valid user data or NULL for kernel objects + */ +static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev, + void *private, int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx, + struct ib_udata *udata) +{ + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + "ibcore", udata); +} + +/** + * ib_alloc_cq: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * + * NOTE: for user cq use ib_alloc_cq_user with valid udata! + */ +static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx) +{ + return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + NULL); +} + +/** + * ib_free_cq_user - Free kernel/user CQ + * @cq: The CQ to free + * @udata: Valid user data or NULL for kernel objects + */ +void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_free_cq - Free kernel CQ + * @cq: The CQ to free + * + * NOTE: for user cq use ib_free_cq_user with valid udata! + */ +static inline void ib_free_cq(struct ib_cq *cq) +{ + ib_free_cq_user(cq, NULL); +} /** * ib_create_cq - Creates a CQ on the specified device. @@ -2902,11 +3352,14 @@ * * Users can examine the cq structure to determine the actual CQ size. */ -struct ib_cq *ib_create_cq(struct ib_device *device, - ib_comp_handler comp_handler, - void (*event_handler)(struct ib_event *, void *), - void *cq_context, - const struct ib_cq_init_attr *cq_attr); +struct ib_cq *__ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, + const struct ib_cq_init_attr *cq_attr, + const char *caller); +#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \ + __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), "ibcore") /** * ib_resize_cq - Modifies the capacity of the CQ. @@ -2927,10 +3380,22 @@ int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** - * ib_destroy_cq - Destroys the specified CQ. + * ib_destroy_cq_user - Destroys the specified CQ. * @cq: The CQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_cq(struct ib_cq *cq); +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_destroy_cq - Destroys the specified kernel CQ. + * @cq: The CQ to destroy. + * + * NOTE: for user cq use ib_destroy_cq_user with valid udata! + */ +static inline void ib_destroy_cq(struct ib_cq *cq) +{ + ib_destroy_cq_user(cq, NULL); +} /** * ib_poll_cq - poll a CQ for completion(s) @@ -3278,11 +3743,34 @@ * * This function can fail, if the memory region has memory windows bound to it. */ -int ib_dereg_mr(struct ib_mr *mr); +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata); -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +/** + * ib_dereg_mr - Deregisters a kernel memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + * + * This function can fail, if the memory region has memory windows bound to it. + * + * NOTE: for user mr use ib_dereg_mr_user with valid udata! + */ +static inline int ib_dereg_mr(struct ib_mr *mr) +{ + return ib_dereg_mr_user(mr, NULL); +} + +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); + +static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg) +{ + return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); +} + +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg); /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR @@ -3371,18 +3859,18 @@ /** * ib_alloc_xrcd - Allocates an XRC domain. * @device: The device on which to allocate the XRC domain. + * @caller: Module name for kernel consumers */ -struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); +struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); +#define ib_alloc_xrcd(device) \ + __ib_alloc_xrcd((device), "ibcore") /** * ib_dealloc_xrcd - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object */ -int ib_dealloc_xrcd(struct ib_xrcd *xrcd); - -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, int domain); -int ib_destroy_flow(struct ib_flow *flow_id); +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(int flags) { @@ -3394,9 +3882,26 @@ !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; + if (flags & ~IB_ACCESS_SUPPORTED) + return -EINVAL; + return 0; } +static inline bool ib_access_writable(int access_flags) +{ + /* + * We have writable memory backing the MR if any of the following + * access flags are set. "Local write" and "remote write" obviously + * require write access. "Remote atomic" can do things like fetch and + * add, which will modify memory, and "MW bind" can change permissions + * by binding a window. + */ + return access_flags & + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND); +} + /** * ib_check_mr_status: lightweight check of MR status. * This routine may provide status checks on a selected @@ -3417,7 +3922,7 @@ const struct sockaddr *addr); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); -int ib_destroy_wq(struct ib_wq *wq); +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, @@ -3449,6 +3954,8 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); + int ib_resolve_eth_dmac(struct ib_device *device, struct ib_ah_attr *ah_attr); #endif /* IB_VERBS_H */ diff --git a/sys/ofed/include/rdma/signature.h b/sys/ofed/include/rdma/signature.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/rdma/signature.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */ +/* + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_SIGNATURE_H_ +#define _RDMA_SIGNATURE_H_ + +#include + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, +}; + +/** + * enum ib_signature_type - Signature types + * @IB_SIG_TYPE_NONE: Unprotected. + * @IB_SIG_TYPE_T10_DIF: Type T10-DIF + */ +enum ib_signature_type { + IB_SIG_TYPE_NONE, + IB_SIG_TYPE_T10_DIF, +}; + +/** + * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types + * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * @IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM, +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @ref_remap: Indicate wethear the reftag increments each block + * @app_escape: Indicate to skip block check if apptag=0xffff + * @ref_escape: Indicate to skip block check if reftag=0xffffffff + * @apptag_check_mask: check bitmask of application tag. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_bg_type bg_type; + u16 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool ref_remap; + bool app_escape; + bool ref_escape; + u16 apptag_check_mask; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout descriptor. + * @wire: wire domain layout descriptor. + * @meta_length: metadata length + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; + int meta_length; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/* + * Signature check masks (8 bytes in total) according to the T10-PI standard: + * -------- -------- ------------ + * | GUARD | APPTAG | REFTAG | + * | 2B | 2B | 4B | + * -------- -------- ------------ + */ +enum { + IB_SIG_CHECK_GUARD = 0xc0, + IB_SIG_CHECK_APPTAG = 0x30, + IB_SIG_CHECK_REFTAG = 0x0f, +}; + +/* + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +#endif /* _RDMA_SIGNATURE_H_ */ diff --git a/sys/ofed/include/rdma/uverbs_ioctl.h b/sys/ofed/include/rdma/uverbs_ioctl.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/rdma/uverbs_ioctl.h @@ -0,0 +1,958 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_IOCTL_ +#define _UVERBS_IOCTL_ + +#include +#include +#include +#include +#include + +/* + * ======================================= + * Verbs action specifications + * ======================================= + */ + +enum uverbs_attr_type { + UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE_PTR_OUT, + UVERBS_ATTR_TYPE_IDR, + UVERBS_ATTR_TYPE_FD, + UVERBS_ATTR_TYPE_ENUM_IN, + UVERBS_ATTR_TYPE_IDRS_ARRAY, +}; + +enum uverbs_obj_access { + UVERBS_ACCESS_READ, + UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_NEW, + UVERBS_ACCESS_DESTROY +}; + +/* Specification of a single attribute inside the ioctl message */ +/* good size 16 */ +struct uverbs_attr_spec { + u8 type; + + /* + * Support extending attributes by length. Allow the user to provide + * more bytes than ptr.len, but check that everything after is zero'd + * by the user. + */ + u8 zero_trailing:1; + /* + * Valid only for PTR_IN. Allocate and copy the data inside + * the parser + */ + u8 alloc_and_copy:1; + u8 mandatory:1; + /* True if this is from UVERBS_ATTR_UHW */ + u8 is_udata:1; + + union { + struct { + /* Current known size to kernel */ + u16 len; + /* User isn't allowed to provide something < min_len */ + u16 min_len; + } ptr; + + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u8 access; + } obj; + + struct { + u8 num_elems; + } enum_def; + } u; + + /* This weird split lets us remove some padding */ + union { + struct { + /* + * The enum attribute can select one of the attributes + * contained in the ids array. Currently only PTR_IN + * attributes are supported in the ids array. + */ + const struct uverbs_attr_spec *ids; + } enum_def; + + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u16 min_len; + u16 max_len; + u8 access; + } objs_arr; + } u2; +}; + +/* + * Information about the API is loaded into a radix tree. For IOCTL we start + * with a tuple of: + * object_id, attr_id, method_id + * + * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based + * on the current kernel support this is compressed into 16 bit key for the + * radix tree. Since this compression is entirely internal to the kernel the + * below limits can be revised if the kernel gains additional data. + * + * With 64 leafs per node this is a 3 level radix tree. + * + * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns + * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot. + * + * This also encodes the tables for the write() and write() extended commands + * using the coding + * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command # + * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex # + * ie the WRITE path is treated as a special method type in the ioctl + * framework. + */ +enum uapi_radix_data { + UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT, + + UVERBS_API_ATTR_KEY_BITS = 6, + UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0), + UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1, + UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS, + + UVERBS_API_METHOD_KEY_BITS = 5, + UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS, + UVERBS_API_METHOD_KEY_NUM_CORE = 22, + UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT, + UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT, + UVERBS_API_METHOD_KEY_NUM_DRIVER = + (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) - + UVERBS_API_METHOD_KEY_NUM_CORE, + UVERBS_API_METHOD_KEY_MASK = GENMASK( + UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1, + UVERBS_API_METHOD_KEY_SHIFT), + + UVERBS_API_OBJ_KEY_BITS = 5, + UVERBS_API_OBJ_KEY_SHIFT = + UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT, + UVERBS_API_OBJ_KEY_NUM_CORE = 20, + UVERBS_API_OBJ_KEY_NUM_DRIVER = + (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE, + UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT), + + /* This id guaranteed to not exist in the radix tree */ + UVERBS_API_KEY_ERR = 0xFFFFFFFF, +}; + +static inline __attribute_const__ u32 uapi_key_obj(u32 id) +{ + if (id & UVERBS_API_NS_FLAG) { + id &= ~UVERBS_API_NS_FLAG; + if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER) + return UVERBS_API_KEY_ERR; + id = id + UVERBS_API_OBJ_KEY_NUM_CORE; + } else { + if (id >= UVERBS_API_OBJ_KEY_NUM_CORE) + return UVERBS_API_KEY_ERR; + } + + return id << UVERBS_API_OBJ_KEY_SHIFT; +} + +static inline __attribute_const__ bool uapi_key_is_object(u32 key) +{ + return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0; +} + +static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id) +{ + if (id & UVERBS_API_NS_FLAG) { + id &= ~UVERBS_API_NS_FLAG; + if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER) + return UVERBS_API_KEY_ERR; + id = id + UVERBS_API_METHOD_KEY_NUM_CORE; + } else { + id++; + if (id >= UVERBS_API_METHOD_KEY_NUM_CORE) + return UVERBS_API_KEY_ERR; + } + + return id << UVERBS_API_METHOD_KEY_SHIFT; +} + +static inline __attribute_const__ u32 uapi_key_write_method(u32 id) +{ + if (id >= UVERBS_API_WRITE_KEY_NUM) + return UVERBS_API_KEY_ERR; + return UVERBS_API_METHOD_IS_WRITE | id; +} + +static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id) +{ + if (id >= UVERBS_API_WRITE_KEY_NUM) + return UVERBS_API_KEY_ERR; + return UVERBS_API_METHOD_IS_WRITE_EX | id; +} + +static inline __attribute_const__ u32 +uapi_key_attr_to_ioctl_method(u32 attr_key) +{ + return attr_key & + (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK); +} + +static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key) +{ + unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; + + return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && + (key & UVERBS_API_ATTR_KEY_MASK) == 0; +} + +static inline __attribute_const__ bool uapi_key_is_write_method(u32 key) +{ + return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE; +} + +static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key) +{ + return (key & UVERBS_API_METHOD_KEY_MASK) == + UVERBS_API_METHOD_IS_WRITE_EX; +} + +static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key) +{ + /* 0 is the method slot itself */ + return ioctl_method_key + 1; +} + +static inline __attribute_const__ u32 uapi_key_attr(u32 id) +{ + /* + * The attr is designed to fit in the typical single radix tree node + * of 64 entries. Since allmost all methods have driver attributes we + * organize things so that the driver and core attributes interleave to + * reduce the length of the attributes array in typical cases. + */ + if (id & UVERBS_API_NS_FLAG) { + id &= ~UVERBS_API_NS_FLAG; + id++; + if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) + return UVERBS_API_KEY_ERR; + id = (id << 1) | 0; + } else { + if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) + return UVERBS_API_KEY_ERR; + id = (id << 1) | 1; + } + + return id; +} + +/* Only true for ioctl methods */ +static inline __attribute_const__ bool uapi_key_is_attr(u32 key) +{ + unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; + + return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && + (key & UVERBS_API_ATTR_KEY_MASK) != 0; +} + +/* + * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN), + * basically it undoes the reservation of 0 in the ID numbering. attr_key + * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of + * uapi_key_attr(). + */ +static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key) +{ + return attr_key - 1; +} + +static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey) +{ + return attr_bkey + 1; +} + +/* + * ======================================= + * Verbs definitions + * ======================================= + */ + +struct uverbs_attr_def { + u16 id; + struct uverbs_attr_spec attr; +}; + +struct uverbs_method_def { + u16 id; + /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ + u32 flags; + size_t num_attrs; + const struct uverbs_attr_def * const (*attrs)[]; + int (*handler)(struct uverbs_attr_bundle *attrs); +}; + +struct uverbs_object_def { + u16 id; + const struct uverbs_obj_type *type_attrs; + size_t num_methods; + const struct uverbs_method_def * const (*methods)[]; +}; + +enum uapi_definition_kind { + UAPI_DEF_END = 0, + UAPI_DEF_OBJECT_START, + UAPI_DEF_WRITE, + UAPI_DEF_CHAIN_OBJ_TREE, + UAPI_DEF_CHAIN, + UAPI_DEF_IS_SUPPORTED_FUNC, + UAPI_DEF_IS_SUPPORTED_DEV_FN, +}; + +enum uapi_definition_scope { + UAPI_SCOPE_OBJECT = 1, + UAPI_SCOPE_METHOD = 2, +}; + +struct uapi_definition { + u8 kind; + u8 scope; + union { + struct { + u16 object_id; + } object_start; + struct { + u16 command_num; + u8 is_ex:1; + u8 has_udata:1; + u8 has_resp:1; + u8 req_size; + u8 resp_size; + } write; + }; + + union { + bool (*func_is_supported)(struct ib_device *device); + int (*func_write)(struct uverbs_attr_bundle *attrs); + const struct uapi_definition *chain; + const struct uverbs_object_def *chain_obj_tree; + size_t needs_fn_offset; + }; +}; + +/* Define things connected to object_id */ +#define DECLARE_UVERBS_OBJECT(_object_id, ...) \ + { \ + .kind = UAPI_DEF_OBJECT_START, \ + .object_start = { .object_id = _object_id }, \ + }, \ + ##__VA_ARGS__ + +/* Use in a var_args of DECLARE_UVERBS_OBJECT */ +#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \ + { \ + .kind = UAPI_DEF_WRITE, \ + .scope = UAPI_SCOPE_OBJECT, \ + .write = { .is_ex = 0, .command_num = _command_num }, \ + .func_write = _func, \ + _cmd_desc, \ + }, \ + ##__VA_ARGS__ + +/* Use in a var_args of DECLARE_UVERBS_OBJECT */ +#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \ + { \ + .kind = UAPI_DEF_WRITE, \ + .scope = UAPI_SCOPE_OBJECT, \ + .write = { .is_ex = 1, .command_num = _command_num }, \ + .func_write = _func, \ + _cmd_desc, \ + }, \ + ##__VA_ARGS__ + +/* + * Object is only supported if the function pointer named ibdev_fn in struct + * ib_device is not NULL. + */ +#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \ + { \ + .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ + .scope = UAPI_SCOPE_OBJECT, \ + .needs_fn_offset = \ + offsetof(struct ib_device, ibdev_fn) + \ + BUILD_BUG_ON_ZERO( \ + sizeof(((struct ib_device *)0)->ibdev_fn) != \ + sizeof(void *)), \ + } + +/* + * Method is only supported if the function pointer named ibdev_fn in struct + * ib_device is not NULL. + */ +#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \ + { \ + .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ + .scope = UAPI_SCOPE_METHOD, \ + .needs_fn_offset = \ + offsetof(struct ib_device, ibdev_fn) + \ + BUILD_BUG_ON_ZERO( \ + sizeof(((struct ib_device *)0)->ibdev_fn) != \ + sizeof(void *)), \ + } + +/* Call a function to determine if the entire object is supported or not */ +#define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \ + { \ + .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \ + .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \ + } + +/* Include another struct uapi_definition in this one */ +#define UAPI_DEF_CHAIN(_def_var) \ + { \ + .kind = UAPI_DEF_CHAIN, .chain = _def_var, \ + } + +/* Temporary until the tree base description is replaced */ +#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \ + { \ + .kind = UAPI_DEF_CHAIN_OBJ_TREE, \ + .object_start = { .object_id = _object_enum }, \ + .chain_obj_tree = _object_ptr, \ + }, \ + ##__VA_ARGS__ +#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \ + UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \ + ##__VA_ARGS__) + +/* + * ======================================= + * Attribute Specifications + * ======================================= + */ + +#define UVERBS_ATTR_SIZE(_min_len, _len) \ + .u.ptr.min_len = _min_len, .u.ptr.len = _len + +#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0) + +/* + * Specifies a uapi structure that cannot be extended. The user must always + * supply the whole structure and nothing more. The structure must be declared + * in a header under include/uapi/rdma. + */ +#define UVERBS_ATTR_TYPE(_type) \ + .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) +/* + * Specifies a uapi structure where the user must provide at least up to + * member 'last'. Anything after last and up until the end of the structure + * can be non-zero, anything longer than the end of the structure must be + * zero. The structure must be declared in a header under include/uapi/rdma. + */ +#define UVERBS_ATTR_STRUCT(_type, _last) \ + .zero_trailing = 1, \ + UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \ + sizeof(_type)) +/* + * Specifies at least min_len bytes must be passed in, but the amount can be + * larger, up to the protocol maximum size. No check for zeroing is done. + */ +#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) + +/* Must be used in the '...' of any UVERBS_ATTR */ +#define UA_ALLOC_AND_COPY .alloc_and_copy = 1 +#define UA_MANDATORY .mandatory = 1 +#define UA_OPTIONAL .mandatory = 0 + +/* + * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only + * READ\WRITE accesses are supported. + */ +#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \ + ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id) + \ + BUILD_BUG_ON_ZERO((_min_len) == 0 || \ + (_max_len) > \ + PAGE_SIZE / sizeof(void *) || \ + (_min_len) > (_max_len) || \ + (_access) == UVERBS_ACCESS_NEW || \ + (_access) == UVERBS_ACCESS_DESTROY), \ + .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \ + .u2.objs_arr.obj_type = _idr_type, \ + .u2.objs_arr.access = _access, \ + .u2.objs_arr.min_len = _min_len, \ + .u2.objs_arr.max_len = _max_len, \ + __VA_ARGS__ } }) + +/* + * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted, + * the user must validate the type of the uobject instead. + */ +#define UVERBS_IDR_ANY_OBJECT 0xFFFF + +#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_IDR, \ + .u.obj.obj_type = _idr_type, \ + .u.obj.access = _access, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id) + \ + BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \ + (_access) != UVERBS_ACCESS_READ), \ + .attr = { .type = UVERBS_ATTR_TYPE_FD, \ + .u.obj.obj_type = _fd_type, \ + .u.obj.access = _access, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \ + _type, \ + __VA_ARGS__ } }) + +#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \ + _type, \ + __VA_ARGS__ } }) + +/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */ +#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = _attr_id, \ + .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \ + .u2.enum_def.ids = _enum_arr, \ + .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \ + __VA_ARGS__ }, \ + }) + +/* An input value that is a member in the enum _enum_type. */ +#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \ + UVERBS_ATTR_PTR_IN( \ + _attr_id, \ + UVERBS_ATTR_SIZE( \ + sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \ + sizeof(u64)), \ + __VA_ARGS__) + +/* + * An input value that is a bitwise combination of values of _enum_type. + * This permits the flag value to be passed as either a u32 or u64, it must + * be retrieved via uverbs_get_flag(). + */ +#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...) \ + UVERBS_ATTR_PTR_IN( \ + _attr_id, \ + UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO( \ + !sizeof(_enum_type *)), \ + sizeof(u64)), \ + __VA_ARGS__) + +/* + * This spec is used in order to pass information to the hardware driver in a + * legacy way. Every verb that could get driver specific data should get this + * spec. + */ +#define UVERBS_ATTR_UHW() \ + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ + UVERBS_ATTR_MIN_SIZE(0), \ + UA_OPTIONAL, \ + .is_udata = 1), \ + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ + UVERBS_ATTR_MIN_SIZE(0), \ + UA_OPTIONAL, \ + .is_udata = 1) + +/* ================================================= + * Parsing infrastructure + * ================================================= + */ + + +struct uverbs_ptr_attr { + /* + * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is + * used. + */ + union { + void *ptr; + u64 data; + }; + u16 len; + u16 uattr_idx; + u8 enum_id; +}; + +struct uverbs_obj_attr { + struct ib_uobject *uobject; + const struct uverbs_api_attr *attr_elm; +}; + +struct uverbs_objs_arr_attr { + struct ib_uobject **uobjects; + u16 len; +}; + +struct uverbs_attr { + union { + struct uverbs_ptr_attr ptr_attr; + struct uverbs_obj_attr obj_attr; + struct uverbs_objs_arr_attr objs_arr_attr; + }; +}; + +struct uverbs_attr_bundle { + struct ib_udata driver_udata; + struct ib_udata ucore; + struct ib_uverbs_file *ufile; + struct ib_ucontext *context; + DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); + struct uverbs_attr attrs[0]; +}; + +static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle, + unsigned int idx) +{ + return test_bit(uapi_bkey_attr(uapi_key_attr(idx)), + attrs_bundle->attr_present); +} + +/** + * rdma_udata_to_drv_context - Helper macro to get the driver's context out of + * ib_udata which is embedded in uverbs_attr_bundle. + * + * If udata is not NULL this cannot fail. Otherwise a NULL udata will result + * in a NULL ucontext pointer, as a safety precaution. Callers should be using + * 'udata' to determine if the driver call is in user or kernel mode, not + * 'ucontext'. + * + */ +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ + driver_udata) \ + ->context, \ + drv_dev_struct, member) : \ + (drv_dev_struct *)NULL) + +#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) + +static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + if (!uverbs_attr_is_valid(attrs_bundle, idx)) + return ERR_PTR(-ENOENT); + + return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))]; +} + +static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return attr->ptr_attr.enum_id; +} + +static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + const struct uverbs_attr *attr; + + attr = uverbs_attr_get(attrs_bundle, idx); + if (IS_ERR(attr)) + return ERR_CAST(attr); + + return attr->obj_attr.uobject->object; +} + +static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return ERR_CAST(attr); + + return attr->obj_attr.uobject; +} + +static inline int +uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return attr->ptr_attr.len; +} + +/* + * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr + * attribute. + * @attrs: The attribute bundle + * @idx: The ID of the attribute + * @elem_size: The size of the element in the array + */ +static inline int +uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx, + size_t elem_size) +{ + int size = uverbs_attr_get_len(attrs, idx); + + if (size < 0) + return size; + + if (size % elem_size) + return -EINVAL; + + return size / elem_size; +} + +/** + * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for + * UVERBS_ATTR_TYPE_IDRS_ARRAY. + * @arr: Returned pointer to array of pointers for uobjects or NULL if + * the attribute isn't provided. + * + * Return: The array length or 0 if no attribute was provided. + */ +static inline int uverbs_attr_get_uobjs_arr( + const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx, + struct ib_uobject ***arr) +{ + const struct uverbs_attr *attr = + uverbs_attr_get(attrs_bundle, attr_idx); + + if (IS_ERR(attr)) { + *arr = NULL; + return 0; + } + + *arr = attr->objs_arr_attr.uobjects; + + return attr->objs_arr_attr.len; +} + +static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) +{ + return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); +} + +static inline void *uverbs_attr_get_alloced_ptr( + const struct uverbs_attr_bundle *attrs_bundle, u16 idx) +{ + struct uverbs_attr *attr = __DECONST(struct uverbs_attr *, uverbs_attr_get(attrs_bundle, idx)); + + if (IS_ERR(attr)) + return (void *)attr; + + return uverbs_attr_ptr_is_inline(attr) ? + (void *)&attr->ptr_attr.data : attr->ptr_attr.ptr; +} + +static inline int _uverbs_copy_from(void *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, + size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + /* + * Validation ensures attr->ptr_attr.len >= size. If the caller is + * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call + * uverbs_copy_from_or_zero. + */ + if (unlikely(size < attr->ptr_attr.len)) + return -EINVAL; + + if (uverbs_attr_ptr_is_inline(attr)) + memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); + else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), + attr->ptr_attr.len)) + return -EFAULT; + + return 0; +} + +static inline int _uverbs_copy_from_or_zero(void *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, + size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + size_t min_size; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + min_size = min_t(size_t, size, attr->ptr_attr.len); + + if (uverbs_attr_ptr_is_inline(attr)) + memcpy(to, &attr->ptr_attr.data, min_size); + else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), + min_size)) + return -EFAULT; + + if (size > min_size) + memset((char *)to + min_size, 0, size - min_size); + + return 0; +} + +#define uverbs_copy_from(to, attrs_bundle, idx) \ + _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to)) + +#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ + _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) + +static inline struct ib_ucontext * +ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs) +{ + return ib_uverbs_get_ucontext_file(attrs->ufile); +} + +#if 1 /* IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) */ +int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits); +int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits); +int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, + const void *from, size_t size); +__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, + gfp_t flags); + +static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return _uverbs_alloc(bundle, size, GFP_KERNEL); +} + +static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); +} +int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val); +int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size); +#else +static inline int +uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + return -EINVAL; +} +static inline int +uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 allowed_bits) +{ + return -EINVAL; +} +static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, const void *from, size_t size) +{ + return -EINVAL; +} +static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return ERR_PTR(-EINVAL); +} +static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, + size_t size) +{ + return ERR_PTR(-EINVAL); +} +static inline int +_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val) +{ + return -EINVAL; +} +static inline int +uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size) +{ + return -EINVAL; +} +#endif + +#define uverbs_get_const(_to, _attrs_bundle, _idx) \ + ({ \ + s64 _val; \ + int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*_to)), \ + type_max(typeof(*_to)), NULL); \ + (*_to) = _val; \ + _ret; \ + }) + +#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ + ({ \ + s64 _val; \ + s64 _def_val = _default; \ + int _ret = \ + _uverbs_get_const(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*_to)), \ + type_max(typeof(*_to)), &_def_val); \ + (*_to) = _val; \ + _ret; \ + }) +#endif diff --git a/sys/ofed/include/rdma/uverbs_named_ioctl.h b/sys/ofed/include/rdma/uverbs_named_ioctl.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/rdma/uverbs_named_ioctl.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_NAMED_IOCTL_ +#define _UVERBS_NAMED_IOCTL_ + +#include + +#ifndef UVERBS_MODULE_NAME +#error "Please #define UVERBS_MODULE_NAME before including rdma/uverbs_named_ioctl.h" +#endif + +#define _UVERBS_PASTE(x, y) x ## y +#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) +#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) +#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) +#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id) + +/* These are static so they do not need to be qualified */ +#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id +#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id + +#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .handler = UVERBS_HANDLER(_method_id), \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + } + +/* Create a standard destroy method using the default handler. The handle_attr + * argument must be the attribute specifying the handle to destroy, the + * default handler does not support any other attributes. + */ +#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { _handle_attr }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .handler = uverbs_destroy_def_handler, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + } + +#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + .id = _object_id, \ + .type_attrs = &_type_attrs, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + } + +/* + * Declare global methods. These still have a unique object_id because we + * identify all uapi methods with a (object,method) tuple. However, they have + * no type pointer. + */ +#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + .id = _object_id, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + } + +/* Used by drivers to declare a complete parsing tree for new methods + */ +#define ADD_UVERBS_METHODS(_name, _object_id, ...) \ + static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ + _object_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_object_def _name = { \ + .id = _object_id, \ + .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ + .methods = &UVERBS_OBJECT_METHODS(_object_id) \ + }; + +/* Used by drivers to declare a complete parsing tree for a single method that + * differs only in having additional driver specific attributes. + */ +#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \ + static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ + _method_id)[] = { __VA_ARGS__ }; \ + static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ + .id = _method_id, \ + .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ + .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ + }; \ + ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id)) + +#endif diff --git a/sys/ofed/include/rdma/uverbs_std_types.h b/sys/ofed/include/rdma/uverbs_std_types.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/rdma/uverbs_std_types.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_STD_TYPES__ +#define _UVERBS_STD_TYPES__ + +#include +#include +#include + +/* Returns _id, or causes a compile error if _id is not a u32. + * + * The uobj APIs should only be used with the write based uAPI to access + * object IDs. The write API must use a u32 for the object handle, which is + * checked by this macro. + */ +#define _uobj_check_id(_id) ({ CTASSERT(sizeof(_id) == sizeof(u32)); (_id); }) + +#define uobj_get_type(_attrs, _object) \ + uapi_get_object((_attrs)->ufile->device->uapi, _object) + +#define uobj_get_read(_type, _id, _attrs) \ + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_READ, \ + _attrs) + +#define ufd_get_read(_type, _fdnum, _attrs) ({ \ + CTASSERT(sizeof(_fdnum) == sizeof(s32)); \ + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + (_fdnum), \ + UVERBS_LOOKUP_READ, _attrs); \ +}) + +static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) +{ + if (IS_ERR(uobj)) + return NULL; + return uobj->object; +} +#define uobj_get_obj_read(_object, _type, _id, _attrs) \ + ((struct ib_##_object *)_uobj_get_obj_read( \ + uobj_get_read(_type, _id, _attrs))) + +#define uobj_get_write(_type, _id, _attrs) \ + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \ + _attrs) + +int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, + struct uverbs_attr_bundle *attrs); +#define uobj_perform_destroy(_type, _id, _attrs) \ + __uobj_perform_destroy(uobj_get_type(_attrs, _type), \ + _uobj_check_id(_id), _attrs) + +struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, + u32 id, struct uverbs_attr_bundle *attrs); + +#define uobj_get_destroy(_type, _id, _attrs) \ + __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \ + _attrs) + +static inline void uobj_put_destroy(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); +} + +static inline void uobj_put_read(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); +} + +#define uobj_put_obj_read(_obj) \ + uobj_put_read((_obj)->uobject) + +static inline void uobj_put_write(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); +} + +static inline void uobj_alloc_abort(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) +{ + rdma_alloc_abort_uobject(uobj, attrs); +} + +static inline struct ib_uobject * +__uobj_alloc(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) +{ + struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs); + + if (!IS_ERR(uobj)) + *ib_dev = attrs->context->device; + return uobj; +} + +#define uobj_alloc(_type, _attrs, _ib_dev) \ + __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev) + +static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action, + struct ib_uobject *uobj, + struct ib_device *ib_dev, + enum ib_flow_action_type type) +{ + atomic_set(&action->usecnt, 0); + action->device = ib_dev; + action->type = type; + action->uobject = uobj; + uobj->object = action; +} + +struct ib_uflow_resources { + size_t max; + size_t num; + size_t collection_num; + size_t counters_num; + struct ib_counters **counters; + struct ib_flow_action **collection; +}; + +struct ib_uflow_object { + struct ib_uobject uobject; + struct ib_uflow_resources *resources; +}; + +struct ib_uflow_resources *flow_resources_alloc(size_t num_specs); +void flow_resources_add(struct ib_uflow_resources *uflow_res, + enum ib_flow_spec_type type, + void *ibobj); +void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); + +static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, + struct ib_qp *qp, struct ib_device *device, + struct ib_uflow_resources *uflow_res) +{ + struct ib_uflow_object *uflow; + + uobj->object = ibflow; + ibflow->uobject = uobj; + + if (qp) { + atomic_inc(&qp->usecnt); + ibflow->qp = qp; + } + + ibflow->device = device; + uflow = container_of(uobj, typeof(*uflow), uobject); + uflow->resources = uflow_res; +} + +struct uverbs_api_object { + const struct uverbs_obj_type *type_attrs; + const struct uverbs_obj_type_class *type_class; + u8 disabled:1; + u32 id; +}; + +static inline u32 uobj_get_object_id(struct ib_uobject *uobj) +{ + return uobj->uapi_object->id; +} + +#endif + diff --git a/sys/ofed/include/rdma/uverbs_types.h b/sys/ofed/include/rdma/uverbs_types.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/rdma/uverbs_types.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_TYPES_ +#define _UVERBS_TYPES_ + +#include +#include +#include + +struct uverbs_obj_type; +struct uverbs_api_object; +struct uverbs_attr_bundle; +struct ib_uverbs_file; + +enum rdma_remove_reason; + +enum rdma_lookup_mode { + UVERBS_LOOKUP_READ, + UVERBS_LOOKUP_WRITE, + /* + * Destroy is like LOOKUP_WRITE, except that the uobject is not + * locked. uobj_destroy is used to convert a LOOKUP_DESTROY lock into + * a LOOKUP_WRITE lock. + */ + UVERBS_LOOKUP_DESTROY, +}; + +/* + * The following sequences are valid: + * Success flow: + * alloc_begin + * alloc_commit + * [..] + * Access flow: + * lookup_get(exclusive=false) & uverbs_try_lock_object + * lookup_put(exclusive=false) via rdma_lookup_put_uobject + * Destruction flow: + * lookup_get(exclusive=true) & uverbs_try_lock_object + * remove_commit + * remove_handle (optional) + * lookup_put(exclusive=true) via rdma_lookup_put_uobject + * + * Allocate Error flow #1 + * alloc_begin + * alloc_abort + * Allocate Error flow #2 + * alloc_begin + * remove_commit + * alloc_abort + * Allocate Error flow #3 + * alloc_begin + * alloc_commit (fails) + * remove_commit + * alloc_abort + * + * In all cases the caller must hold the ufile kref until alloc_commit or + * alloc_abort returns. + */ +struct uverbs_obj_type_class { + struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs); + /* This consumes the kref on uobj */ + void (*alloc_commit)(struct ib_uobject *uobj); + /* This does not consume the kref on uobj */ + void (*alloc_abort)(struct ib_uobject *uobj); + + struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode); + void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); + /* This does not consume the kref on uobj */ + int __must_check (*destroy_hw)(struct ib_uobject *uobj, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); + void (*remove_handle)(struct ib_uobject *uobj); +}; + +struct uverbs_obj_type { + const struct uverbs_obj_type_class * const type_class; + size_t obj_size; +}; + +/* + * Objects type classes which support a detach state (object is still alive but + * it's not attached to any context need to make sure: + * (a) no call through to a driver after a detach is called + * (b) detach isn't called concurrently with context_cleanup + */ + +struct uverbs_obj_idr_type { + /* + * In idr based objects, uverbs_obj_type_class points to a generic + * idr operations. In order to specialize the underlying types (e.g. CQ, + * QPs, etc.), we add destroy_object specific callbacks. + */ + struct uverbs_obj_type type; + + /* Free driver resources from the uobject, make the driver uncallable, + * and move the uobject to the detached state. If the object was + * destroyed by the user's request, a failure should leave the uobject + * completely unchanged. + */ + int __must_check (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); +}; + +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, + struct ib_uverbs_file *ufile, s64 id, + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs); +void rdma_lookup_put_uobject(struct ib_uobject *uobj, + enum rdma_lookup_mode mode); +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs); +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); + +/* + * uverbs_uobject_get is called in order to increase the reference count on + * an uobject. This is useful when a handler wants to keep the uobject's memory + * alive, regardless if this uobject is still alive in the context's objects + * repository. Objects are put via uverbs_uobject_put. + */ +static inline void uverbs_uobject_get(struct ib_uobject *uobject) +{ + kref_get(&uobject->ref); +} +void uverbs_uobject_put(struct ib_uobject *uobject); + +struct uverbs_obj_fd_type { + /* + * In fd based objects, uverbs_obj_type_ops points to generic + * fd operations. In order to specialize the underlying types (e.g. + * completion_channel), we use fops, name and flags for fd creation. + * destroy_object is called when the uobject is to be destroyed, + * because the driver is removed or the FD is closed. + */ + struct uverbs_obj_type type; + int (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); + const struct file_operations *fops; + const char *name; + int flags; +}; + +extern const struct uverbs_obj_type_class uverbs_idr_class; +extern const struct uverbs_obj_type_class uverbs_fd_class; +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); + +#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ + sizeof(char)) +#define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \ + ((&((const struct uverbs_obj_fd_type) \ + {.type = { \ + .type_class = &uverbs_fd_class, \ + .obj_size = (_obj_size) + \ + UVERBS_BUILD_BUG_ON((_obj_size) < \ + sizeof(struct ib_uobject)), \ + }, \ + .destroy_object = _destroy_object, \ + .fops = _fops, \ + .name = _name, \ + .flags = _flags}))->type) +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \ + ((&((const struct uverbs_obj_idr_type) \ + {.type = { \ + .type_class = &uverbs_idr_class, \ + .obj_size = (_size) + \ + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject)) \ + }, \ + .destroy_object = _destroy_object,}))->type) +#define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \ + _destroy_object) + +#endif diff --git a/sys/ofed/include/uapi/rdma/ib_user_ioctl_cmds.h b/sys/ofed/include/uapi/rdma/ib_user_ioctl_cmds.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_IOCTL_CMDS_H +#define IB_USER_IOCTL_CMDS_H + +#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +#define UVERBS_UDATA_DRIVER_DATA_NS 1 +#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) + +enum uverbs_default_objects { + UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_OBJECT_PD, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_OBJECT_CQ, + UVERBS_OBJECT_QP, + UVERBS_OBJECT_SRQ, + UVERBS_OBJECT_AH, + UVERBS_OBJECT_MR, + UVERBS_OBJECT_MW, + UVERBS_OBJECT_FLOW, + UVERBS_OBJECT_XRCD, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_OBJECT_WQ, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_OBJECT_DM, + UVERBS_OBJECT_COUNTERS, + UVERBS_OBJECT_ASYNC_EVENT, +}; + +enum { + UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, + UVERBS_ATTR_UHW_OUT, +}; + +enum uverbs_methods_device { + UVERBS_METHOD_INVOKE_WRITE, + UVERBS_METHOD_INFO_HANDLES, + UVERBS_METHOD_QUERY_PORT, + UVERBS_METHOD_GET_CONTEXT, +}; + +enum uverbs_attrs_invoke_write_cmd_attr_ids { + UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_CORE_OUT, + UVERBS_ATTR_WRITE_CMD, +}; + +enum uverbs_attrs_query_port_cmd_attr_ids { + UVERBS_ATTR_QUERY_PORT_PORT_NUM, + UVERBS_ATTR_QUERY_PORT_RESP, +}; + +enum uverbs_attrs_get_context_attr_ids { + UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, +}; + +enum uverbs_attrs_create_cq_cmd_attr_ids { + UVERBS_ATTR_CREATE_CQ_HANDLE, + UVERBS_ATTR_CREATE_CQ_CQE, + UVERBS_ATTR_CREATE_CQ_USER_HANDLE, + UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, + UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, + UVERBS_ATTR_CREATE_CQ_FLAGS, + UVERBS_ATTR_CREATE_CQ_RESP_CQE, +}; + +enum uverbs_attrs_destroy_cq_cmd_attr_ids { + UVERBS_ATTR_DESTROY_CQ_HANDLE, + UVERBS_ATTR_DESTROY_CQ_RESP, +}; + +enum uverbs_attrs_create_flow_action_esp { + UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, + UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, + UVERBS_ATTR_FLOW_ACTION_ESP_ESN, + UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, + UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, + UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, +}; + +enum uverbs_attrs_modify_flow_action_esp { + UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE = + UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, +}; + +enum uverbs_attrs_destroy_flow_action_esp { + UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, +}; + +enum uverbs_methods_cq { + UVERBS_METHOD_CQ_CREATE, + UVERBS_METHOD_CQ_DESTROY, +}; + +enum uverbs_methods_actions_flow_action_ops { + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_METHOD_FLOW_ACTION_DESTROY, + UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, +}; + +enum uverbs_attrs_alloc_dm_cmd_attr_ids { + UVERBS_ATTR_ALLOC_DM_HANDLE, + UVERBS_ATTR_ALLOC_DM_LENGTH, + UVERBS_ATTR_ALLOC_DM_ALIGNMENT, +}; + +enum uverbs_attrs_free_dm_cmd_attr_ids { + UVERBS_ATTR_FREE_DM_HANDLE, +}; + +enum uverbs_methods_dm { + UVERBS_METHOD_DM_ALLOC, + UVERBS_METHOD_DM_FREE, +}; + +enum uverbs_attrs_reg_dm_mr_cmd_attr_ids { + UVERBS_ATTR_REG_DM_MR_HANDLE, + UVERBS_ATTR_REG_DM_MR_OFFSET, + UVERBS_ATTR_REG_DM_MR_LENGTH, + UVERBS_ATTR_REG_DM_MR_PD_HANDLE, + UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_DM_MR_DM_HANDLE, + UVERBS_ATTR_REG_DM_MR_RESP_LKEY, + UVERBS_ATTR_REG_DM_MR_RESP_RKEY, +}; + +enum uverbs_methods_mr { + UVERBS_METHOD_DM_MR_REG, + UVERBS_METHOD_MR_DESTROY, + UVERBS_METHOD_ADVISE_MR, +}; + +enum uverbs_attrs_mr_destroy_ids { + UVERBS_ATTR_DESTROY_MR_HANDLE, +}; + +enum uverbs_attrs_advise_mr_cmd_attr_ids { + UVERBS_ATTR_ADVISE_MR_PD_HANDLE, + UVERBS_ATTR_ADVISE_MR_ADVICE, + UVERBS_ATTR_ADVISE_MR_FLAGS, + UVERBS_ATTR_ADVISE_MR_SGE_LIST, +}; + +enum uverbs_attrs_create_counters_cmd_attr_ids { + UVERBS_ATTR_CREATE_COUNTERS_HANDLE, +}; + +enum uverbs_attrs_destroy_counters_cmd_attr_ids { + UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, +}; + +enum uverbs_attrs_read_counters_cmd_attr_ids { + UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_READ_COUNTERS_FLAGS, +}; + +enum uverbs_methods_actions_counters_ops { + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_METHOD_COUNTERS_READ, +}; + +enum uverbs_attrs_info_handles_id { + UVERBS_ATTR_INFO_OBJECT_ID, + UVERBS_ATTR_INFO_TOTAL_HANDLES, + UVERBS_ATTR_INFO_HANDLES_LIST, +}; + +enum uverbs_methods_pd { + UVERBS_METHOD_PD_DESTROY, +}; + +enum uverbs_attrs_pd_destroy_ids { + UVERBS_ATTR_DESTROY_PD_HANDLE, +}; + +enum uverbs_methods_mw { + UVERBS_METHOD_MW_DESTROY, +}; + +enum uverbs_attrs_mw_destroy_ids { + UVERBS_ATTR_DESTROY_MW_HANDLE, +}; + +enum uverbs_methods_xrcd { + UVERBS_METHOD_XRCD_DESTROY, +}; + +enum uverbs_attrs_xrcd_destroy_ids { + UVERBS_ATTR_DESTROY_XRCD_HANDLE, +}; + +enum uverbs_methods_ah { + UVERBS_METHOD_AH_DESTROY, +}; + +enum uverbs_attrs_ah_destroy_ids { + UVERBS_ATTR_DESTROY_AH_HANDLE, +}; + +enum uverbs_methods_rwq_ind_tbl { + UVERBS_METHOD_RWQ_IND_TBL_DESTROY, +}; + +enum uverbs_attrs_rwq_ind_tbl_destroy_ids { + UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE, +}; + +enum uverbs_methods_flow { + UVERBS_METHOD_FLOW_DESTROY, +}; + +enum uverbs_attrs_flow_destroy_ids { + UVERBS_ATTR_DESTROY_FLOW_HANDLE, +}; + +enum uverbs_method_async_event { + UVERBS_METHOD_ASYNC_EVENT_ALLOC, +}; + +enum uverbs_attrs_async_event_create { + UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE, +}; + +#endif diff --git a/sys/ofed/include/uapi/rdma/ib_user_ioctl_verbs.h b/sys/ofed/include/uapi/rdma/ib_user_ioctl_verbs.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2017-2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_IOCTL_VERBS_H +#define IB_USER_IOCTL_VERBS_H + +#include +#include + +#ifndef RDMA_UAPI_PTR +#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name +#endif + +#define IB_UVERBS_ACCESS_OPTIONAL_FIRST (1 << 20) +#define IB_UVERBS_ACCESS_OPTIONAL_LAST (1 << 29) + +enum ib_uverbs_core_support { + IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS = 1 << 0, +}; + +enum ib_uverbs_access_flags { + IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0, + IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1, + IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2, + IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3, + IB_UVERBS_ACCESS_MW_BIND = 1 << 4, + IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5, + IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6, + IB_UVERBS_ACCESS_HUGETLB = 1 << 7, + + IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST, + IB_UVERBS_ACCESS_OPTIONAL_RANGE = + ((IB_UVERBS_ACCESS_OPTIONAL_LAST << 1) - 1) & + ~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1) +}; + +enum ib_uverbs_query_port_cap_flags { + IB_UVERBS_PCF_SM = 1 << 1, + IB_UVERBS_PCF_NOTICE_SUP = 1 << 2, + IB_UVERBS_PCF_TRAP_SUP = 1 << 3, + IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4, + IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5, + IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6, + IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7, + IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8, + IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9, + IB_UVERBS_PCF_SM_DISABLED = 1 << 10, + IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14, + IB_UVERBS_PCF_CM_SUP = 1 << 16, + IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17, + IB_UVERBS_PCF_REINIT_SUP = 1 << 18, + IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19, + IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20, + IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21, + IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23, + IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24, + IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25, + /* + * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and + * is inaccessible + */ + IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27, + IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28, + IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29, + IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30, + IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31, + + /* NOTE this is an internal flag, not an IBA flag */ + IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26, +}; + +enum ib_uverbs_query_port_flags { + IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0, +}; + +enum ib_uverbs_flow_action_esp_keymat { + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM, +}; + +enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo { + IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ, +}; + +struct ib_uverbs_flow_action_esp_keymat_aes_gcm { + __aligned_u64 iv; + __u32 iv_algo; /* Use enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo */ + + __u32 salt; + __u32 icv_len; + + __u32 key_len; + __u32 aes_key[256 / 32]; +}; + +enum ib_uverbs_flow_action_esp_replay { + IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE, + IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP, +}; + +struct ib_uverbs_flow_action_esp_replay_bmp { + __u32 size; +}; + +enum ib_uverbs_flow_action_esp_flags { + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_INLINE_CRYPTO = 0UL << 0, /* Default */ + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_FULL_OFFLOAD = 1UL << 0, + + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TUNNEL = 0UL << 1, /* Default */ + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TRANSPORT = 1UL << 1, + + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_DECRYPT = 0UL << 2, /* Default */ + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT = 1UL << 2, + + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW = 1UL << 3, +}; + +struct ib_uverbs_flow_action_esp_encap { + /* This struct represents a list of pointers to flow_xxxx_filter that + * encapsulates the payload in ESP tunnel mode. + */ + RDMA_UAPI_PTR(void *, val_ptr); /* pointer to a flow_xxxx_filter */ + RDMA_UAPI_PTR(struct ib_uverbs_flow_action_esp_encap *, next_ptr); + __u16 len; /* Len of the filter struct val_ptr points to */ + __u16 type; /* Use flow_spec_type enum */ +}; + +struct ib_uverbs_flow_action_esp { + __u32 spi; + __u32 seq; + __u32 tfc_pad; + __u32 flags; + __aligned_u64 hard_limit_pkts; +}; + +enum ib_uverbs_read_counters_flags { + /* prefer read values from driver cache */ + IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0, +}; + +enum ib_uverbs_advise_mr_advice { + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH, + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, +}; + +enum ib_uverbs_advise_mr_flag { + IB_UVERBS_ADVISE_MR_FLAG_FLUSH = 1 << 0, +}; + +struct ib_uverbs_query_port_resp_ex { + struct ib_uverbs_query_port_resp legacy_resp; + __u16 port_cap_flags2; + __u8 reserved[6]; +}; + +enum rdma_driver_id { + RDMA_DRIVER_UNKNOWN, + RDMA_DRIVER_MLX5, + RDMA_DRIVER_MLX4, + RDMA_DRIVER_CXGB3, + RDMA_DRIVER_CXGB4, + RDMA_DRIVER_MTHCA, + RDMA_DRIVER_BNXT_RE, + RDMA_DRIVER_OCRDMA, + RDMA_DRIVER_NES, + RDMA_DRIVER_I40IW, + RDMA_DRIVER_VMW_PVRDMA, + RDMA_DRIVER_QEDR, + RDMA_DRIVER_HNS, + RDMA_DRIVER_USNIC, + RDMA_DRIVER_RXE, + RDMA_DRIVER_HFI1, + RDMA_DRIVER_QIB, + RDMA_DRIVER_EFA, + RDMA_DRIVER_SIW, + RDMA_DRIVER_QLNXR, +}; + +#endif diff --git a/sys/ofed/include/uapi/rdma/ib_user_mad.h b/sys/ofed/include/uapi/rdma/ib_user_mad.h --- a/sys/ofed/include/uapi/rdma/ib_user_mad.h +++ b/sys/ofed/include/uapi/rdma/ib_user_mad.h @@ -38,13 +38,7 @@ #ifndef IB_USER_MAD_H #define IB_USER_MAD_H -#ifdef _KERNEL -#include -#include -#else -#include -#include -#endif +#include /* * Increment this value if any changes that break userspace ABI @@ -239,16 +233,4 @@ __u8 reserved[3]; }; -#define IB_IOCTL_MAGIC 0x1b - -#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ - struct ib_user_mad_reg_req) - -#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) - -#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) - -#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \ - struct ib_user_mad_reg_req2) - #endif /* IB_USER_MAD_H */ diff --git a/sys/ofed/include/uapi/rdma/ib_user_verbs.h b/sys/ofed/include/uapi/rdma/ib_user_verbs.h --- a/sys/ofed/include/uapi/rdma/ib_user_verbs.h +++ b/sys/ofed/include/uapi/rdma/ib_user_verbs.h @@ -53,7 +53,7 @@ #define IB_USER_VERBS_ABI_VERSION 6 #define IB_USER_VERBS_CMD_THRESHOLD 50 -enum { +enum ib_uverbs_write_cmds { IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, @@ -101,13 +101,15 @@ IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, IB_USER_VERBS_EX_CMD_CREATE_WQ, IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, - IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_MODIFY_CQ }; /* @@ -122,13 +124,19 @@ */ struct ib_uverbs_async_event_desc { - __u64 element; + __aligned_u64 element; __u32 event_type; /* enum ib_event_type */ __u32 reserved; }; struct ib_uverbs_comp_event_desc { - __u64 cq_handle; + __aligned_u64 cq_handle; +}; + +struct ib_uverbs_cq_moderation_caps { + __u16 max_cq_moderation_count; + __u16 max_cq_moderation_period; + __u32 reserved; }; /* @@ -140,10 +148,7 @@ */ #define IB_USER_VERBS_CMD_COMMAND_MASK 0xff -#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u -#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 - -#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 +#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80000000u struct ib_uverbs_cmd_hdr { __u32 command; @@ -152,33 +157,34 @@ }; struct ib_uverbs_ex_cmd_hdr { - __u64 response; + __aligned_u64 response; __u16 provider_in_words; __u16 provider_out_words; __u32 cmd_hdr_reserved; }; struct ib_uverbs_get_context { - __u64 response; - __u64 driver_data[0]; + __aligned_u64 response; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; __u32 num_comp_vectors; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_device { - __u64 response; - __u64 driver_data[0]; + __aligned_u64 response; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_device_resp { - __u64 fw_ver; + __aligned_u64 fw_ver; __be64 node_guid; __be64 sys_image_guid; - __u64 max_mr_size; - __u64 page_size_cap; + __aligned_u64 max_mr_size; + __aligned_u64 page_size_cap; __u32 vendor_id; __u32 vendor_part_id; __u32 hw_ver; @@ -223,7 +229,7 @@ }; struct ib_uverbs_odp_caps { - __u64 general_caps; + __aligned_u64 general_caps; struct { __u32 rc_odp_caps; __u32 uc_odp_caps; @@ -243,28 +249,47 @@ __u32 reserved; }; +struct ib_uverbs_tm_caps { + /* Max size of rendezvous request message */ + __u32 max_rndv_hdr_size; + /* Max number of entries in tag matching list */ + __u32 max_num_tags; + /* TM flags */ + __u32 flags; + /* Max number of outstanding list operations */ + __u32 max_ops; + /* Max number of SGE in tag matching entry */ + __u32 max_sge; + __u32 reserved; +}; + struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; __u32 response_length; struct ib_uverbs_odp_caps odp_caps; - __u64 timestamp_mask; - __u64 hca_core_clock; /* in KHZ */ - __u64 device_cap_flags_ex; + __aligned_u64 timestamp_mask; + __aligned_u64 hca_core_clock; /* in KHZ */ + __aligned_u64 device_cap_flags_ex; struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; + __u32 raw_packet_caps; + struct ib_uverbs_tm_caps tm_caps; + struct ib_uverbs_cq_moderation_caps cq_moderation_caps; + __aligned_u64 max_dm_size; + __u32 xrc_odp_caps; __u32 reserved; }; struct ib_uverbs_query_port { - __u64 response; + __aligned_u64 response; __u8 port_num; __u8 reserved[7]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_port_resp { - __u32 port_cap_flags; + __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */ __u32 max_msg_sz; __u32 bad_pkey_cntr; __u32 qkey_viol_cntr; @@ -284,16 +309,18 @@ __u8 active_speed; __u8 phys_state; __u8 link_layer; - __u8 reserved[2]; + __u8 flags; /* see ib_uverbs_query_port_flags */ + __u8 reserved; }; struct ib_uverbs_alloc_pd { - __u64 response; - __u64 driver_data[0]; + __aligned_u64 response; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_alloc_pd_resp { __u32 pd_handle; + __u32 driver_data[0]; }; struct ib_uverbs_dealloc_pd { @@ -301,14 +328,15 @@ }; struct ib_uverbs_open_xrcd { - __u64 response; + __aligned_u64 response; __u32 fd; __u32 oflags; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_open_xrcd_resp { __u32 xrcd_handle; + __u32 driver_data[0]; }; struct ib_uverbs_close_xrcd { @@ -316,35 +344,38 @@ }; struct ib_uverbs_reg_mr { - __u64 response; - __u64 start; - __u64 length; - __u64 hca_va; + __aligned_u64 response; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 hca_va; __u32 pd_handle; __u32 access_flags; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_reg_mr_resp { __u32 mr_handle; __u32 lkey; __u32 rkey; + __u32 driver_data[0]; }; struct ib_uverbs_rereg_mr { - __u64 response; + __aligned_u64 response; __u32 mr_handle; __u32 flags; - __u64 start; - __u64 length; - __u64 hca_va; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 hca_va; __u32 pd_handle; __u32 access_flags; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_rereg_mr_resp { __u32 lkey; __u32 rkey; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_dereg_mr { @@ -352,15 +383,17 @@ }; struct ib_uverbs_alloc_mw { - __u64 response; + __aligned_u64 response; __u32 pd_handle; __u8 mw_type; __u8 reserved[3]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_alloc_mw_resp { __u32 mw_handle; __u32 rkey; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_dealloc_mw { @@ -368,7 +401,7 @@ }; struct ib_uverbs_create_comp_channel { - __u64 response; + __aligned_u64 response; }; struct ib_uverbs_create_comp_channel_resp { @@ -376,28 +409,34 @@ }; struct ib_uverbs_create_cq { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 cqe; __u32 comp_vector; __s32 comp_channel; __u32 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; +}; + +enum ib_uverbs_ex_create_cq_flags { + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, + IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, }; struct ib_uverbs_ex_create_cq { - __u64 user_handle; + __aligned_u64 user_handle; __u32 cqe; __u32 comp_vector; __s32 comp_channel; __u32 comp_mask; - __u32 flags; + __u32 flags; /* bitmask of ib_uverbs_ex_create_cq_flags */ __u32 reserved; }; struct ib_uverbs_create_cq_resp { __u32 cq_handle; __u32 cqe; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_ex_create_cq_resp { @@ -407,32 +446,32 @@ }; struct ib_uverbs_resize_cq { - __u64 response; + __aligned_u64 response; __u32 cq_handle; __u32 cqe; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_resize_cq_resp { __u32 cqe; __u32 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_poll_cq { - __u64 response; + __aligned_u64 response; __u32 cq_handle; __u32 ne; }; struct ib_uverbs_wc { - __u64 wr_id; + __aligned_u64 wr_id; __u32 status; __u32 opcode; __u32 vendor_err; __u32 byte_len; union { - __u32 imm_data; + __be32 imm_data; __u32 invalidate_rkey; } ex; __u32 qp_num; @@ -458,7 +497,7 @@ }; struct ib_uverbs_destroy_cq { - __u64 response; + __aligned_u64 response; __u32 cq_handle; __u32 reserved; }; @@ -527,8 +566,8 @@ }; struct ib_uverbs_create_qp { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 send_cq_handle; __u32 recv_cq_handle; @@ -542,7 +581,7 @@ __u8 qp_type; __u8 is_srq; __u8 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; enum ib_uverbs_create_qp_mask { @@ -553,8 +592,22 @@ IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; +enum { + /* + * This value is equal to IB_QP_DEST_QPN. + */ + IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, +}; + +enum { + /* + * This value is equal to IB_QP_RATE_LIMIT. + */ + IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, +}; + struct ib_uverbs_ex_create_qp { - __u64 user_handle; + __aligned_u64 user_handle; __u32 pd_handle; __u32 send_cq_handle; __u32 recv_cq_handle; @@ -571,17 +624,17 @@ __u32 comp_mask; __u32 create_flags; __u32 rwq_ind_tbl_handle; - __u32 reserved1; + __u32 source_qpn; }; struct ib_uverbs_open_qp { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 qpn; __u8 qp_type; __u8 reserved[7]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; /* also used for open response */ @@ -594,6 +647,7 @@ __u32 max_recv_sge; __u32 max_inline_data; __u32 reserved; + __u32 driver_data[0]; }; struct ib_uverbs_ex_create_qp_resp { @@ -622,10 +676,10 @@ }; struct ib_uverbs_query_qp { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 attr_mask; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_qp_resp { @@ -659,7 +713,7 @@ __u8 alt_timeout; __u8 sq_sig_all; __u8 reserved[5]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_modify_qp { @@ -689,14 +743,22 @@ __u8 alt_port_num; __u8 alt_timeout; __u8 reserved[2]; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; +}; + +struct ib_uverbs_ex_modify_qp { + struct ib_uverbs_modify_qp base; + __u32 rate_limit; + __u32 reserved; }; -struct ib_uverbs_modify_qp_resp { +struct ib_uverbs_ex_modify_qp_resp { + __u32 comp_mask; + __u32 response_length; }; struct ib_uverbs_destroy_qp { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 reserved; }; @@ -712,30 +774,48 @@ * document the ABI. */ struct ib_uverbs_sge { - __u64 addr; + __aligned_u64 addr; __u32 length; __u32 lkey; }; +enum ib_uverbs_wr_opcode { + IB_UVERBS_WR_RDMA_WRITE = 0, + IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1, + IB_UVERBS_WR_SEND = 2, + IB_UVERBS_WR_SEND_WITH_IMM = 3, + IB_UVERBS_WR_RDMA_READ = 4, + IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5, + IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6, + IB_UVERBS_WR_LOCAL_INV = 7, + IB_UVERBS_WR_BIND_MW = 8, + IB_UVERBS_WR_SEND_WITH_INV = 9, + IB_UVERBS_WR_TSO = 10, + IB_UVERBS_WR_RDMA_READ_WITH_INV = 11, + IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12, + IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13, + /* Review enum ib_wr_opcode before modifying this */ +}; + struct ib_uverbs_send_wr { - __u64 wr_id; + __aligned_u64 wr_id; __u32 num_sge; - __u32 opcode; + __u32 opcode; /* see enum ib_uverbs_wr_opcode */ __u32 send_flags; union { - __u32 imm_data; + __be32 imm_data; __u32 invalidate_rkey; } ex; union { struct { - __u64 remote_addr; + __aligned_u64 remote_addr; __u32 rkey; __u32 reserved; } rdma; struct { - __u64 remote_addr; - __u64 compare_add; - __u64 swap; + __aligned_u64 remote_addr; + __aligned_u64 compare_add; + __aligned_u64 swap; __u32 rkey; __u32 reserved; } atomic; @@ -749,7 +829,7 @@ }; struct ib_uverbs_post_send { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 wr_count; __u32 sge_count; @@ -762,13 +842,13 @@ }; struct ib_uverbs_recv_wr { - __u64 wr_id; + __aligned_u64 wr_id; __u32 num_sge; __u32 reserved; }; struct ib_uverbs_post_recv { - __u64 response; + __aligned_u64 response; __u32 qp_handle; __u32 wr_count; __u32 sge_count; @@ -781,7 +861,7 @@ }; struct ib_uverbs_post_srq_recv { - __u64 response; + __aligned_u64 response; __u32 srq_handle; __u32 wr_count; __u32 sge_count; @@ -794,15 +874,17 @@ }; struct ib_uverbs_create_ah { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 reserved; struct ib_uverbs_ah_attr attr; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_create_ah_resp { __u32 ah_handle; + __u32 driver_data[0]; }; struct ib_uverbs_destroy_ah { @@ -814,7 +896,7 @@ __u32 qp_handle; __u16 mlid; __u16 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_detach_mcast { @@ -822,7 +904,7 @@ __u32 qp_handle; __u16 mlid; __u16 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_flow_spec_hdr { @@ -830,7 +912,7 @@ __u16 size; __u16 reserved; /* followed by flow_spec */ - __u64 flow_spec_data[0]; + __aligned_u64 flow_spec_data[0]; }; struct ib_uverbs_flow_eth_filter { @@ -916,6 +998,141 @@ struct ib_uverbs_flow_ipv6_filter mask; }; +struct ib_uverbs_flow_spec_action_tag { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + __u32 tag_id; + __u32 reserved1; +}; + +struct ib_uverbs_flow_spec_action_drop { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; +}; + +struct ib_uverbs_flow_spec_action_handle { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + __u32 handle; + __u32 reserved1; +}; + +struct ib_uverbs_flow_spec_action_count { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + __u32 handle; + __u32 reserved1; +}; + +struct ib_uverbs_flow_tunnel_filter { + __be32 tunnel_id; +}; + +struct ib_uverbs_flow_spec_tunnel { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_tunnel_filter val; + struct ib_uverbs_flow_tunnel_filter mask; +}; + +struct ib_uverbs_flow_spec_esp_filter { + __u32 spi; + __u32 seq; +}; + +struct ib_uverbs_flow_spec_esp { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_spec_esp_filter val; + struct ib_uverbs_flow_spec_esp_filter mask; +}; + +struct ib_uverbs_flow_gre_filter { + /* c_ks_res0_ver field is bits 0-15 in offset 0 of a standard GRE header: + * bit 0 - C - checksum bit. + * bit 1 - reserved. set to 0. + * bit 2 - key bit. + * bit 3 - sequence number bit. + * bits 4:12 - reserved. set to 0. + * bits 13:15 - GRE version. + */ + __be16 c_ks_res0_ver; + __be16 protocol; + __be32 key; +}; + +struct ib_uverbs_flow_spec_gre { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_gre_filter val; + struct ib_uverbs_flow_gre_filter mask; +}; + +struct ib_uverbs_flow_mpls_filter { + /* The field includes the entire MPLS label: + * bits 0:19 - label field. + * bits 20:22 - traffic class field. + * bits 23 - bottom of stack bit. + * bits 24:31 - ttl field. + */ + __be32 label; +}; + +struct ib_uverbs_flow_spec_mpls { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_mpls_filter val; + struct ib_uverbs_flow_mpls_filter mask; +}; + struct ib_uverbs_flow_attr { __u32 type; __u16 size; @@ -948,27 +1165,27 @@ }; struct ib_uverbs_create_srq { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 pd_handle; __u32 max_wr; __u32 max_sge; __u32 srq_limit; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_create_xsrq { - __u64 response; - __u64 user_handle; + __aligned_u64 response; + __aligned_u64 user_handle; __u32 srq_type; __u32 pd_handle; __u32 max_wr; __u32 max_sge; __u32 srq_limit; - __u32 reserved; + __u32 max_num_tags; __u32 xrcd_handle; __u32 cq_handle; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_create_srq_resp { @@ -976,6 +1193,7 @@ __u32 max_wr; __u32 max_sge; __u32 srqn; + __u32 driver_data[0]; }; struct ib_uverbs_modify_srq { @@ -983,14 +1201,14 @@ __u32 attr_mask; __u32 max_wr; __u32 srq_limit; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_srq { - __u64 response; + __aligned_u64 response; __u32 srq_handle; __u32 reserved; - __u64 driver_data[0]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_srq_resp { @@ -1001,7 +1219,7 @@ }; struct ib_uverbs_destroy_srq { - __u64 response; + __aligned_u64 response; __u32 srq_handle; __u32 reserved; }; @@ -1013,11 +1231,13 @@ struct ib_uverbs_ex_create_wq { __u32 comp_mask; __u32 wq_type; - __u64 user_handle; + __aligned_u64 user_handle; __u32 pd_handle; __u32 cq_handle; __u32 max_wr; __u32 max_sge; + __u32 create_flags; /* Use enum ib_wq_flags */ + __u32 reserved; }; struct ib_uverbs_ex_create_wq_resp { @@ -1046,6 +1266,8 @@ __u32 wq_handle; __u32 wq_state; __u32 curr_wq_state; + __u32 flags; /* Use enum ib_wq_flags */ + __u32 flags_mask; /* Use enum ib_wq_flags */ }; /* Prevent memory allocation rather than max expected size */ @@ -1072,4 +1294,18 @@ __u32 ind_tbl_handle; }; +struct ib_uverbs_cq_moderation { + __u16 cq_count; + __u16 cq_period; +}; + +struct ib_uverbs_ex_modify_cq { + __u32 cq_handle; + __u32 attr_mask; + struct ib_uverbs_cq_moderation attr; + __u32 reserved; +}; + +#define IB_DEVICE_NAME_MAX 64 + #endif /* IB_USER_VERBS_H */ diff --git a/sys/ofed/include/uapi/rdma/mlx5-abi.h b/sys/ofed/include/uapi/rdma/mlx5-abi.h --- a/sys/ofed/include/uapi/rdma/mlx5-abi.h +++ b/sys/ofed/include/uapi/rdma/mlx5-abi.h @@ -281,4 +281,22 @@ __u32 comp_mask; __u32 reserved; }; + +enum mlx5_ib_mmap_cmd { + MLX5_IB_MMAP_REGULAR_PAGE = 0, + MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, + MLX5_IB_MMAP_WC_PAGE = 2, + MLX5_IB_MMAP_NC_PAGE = 3, + /* 5 is chosen in order to be compatible with old versions of libmlx5 */ + MLX5_IB_MMAP_CORE_CLOCK = 5, + MLX5_IB_MMAP_ALLOC_WC = 6, + MLX5_IB_MMAP_CLOCK_INFO = 7, + MLX5_IB_MMAP_DEVICE_MEM = 8, +}; + +/* Bit indexes for the mlx5_alloc_ucontext_resp.clock_info_versions bitmap */ +enum { + MLX5_IB_CLOCK_INFO_V1 = 0, +}; + #endif /* MLX5_ABI_USER_H */ diff --git a/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_cmds.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_USER_IOCTL_CMDS_H +#define MLX5_USER_IOCTL_CMDS_H + +#include +#include + +enum mlx5_ib_create_flow_action_attrs { + /* This attribute belong to the driver namespace */ + MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_alloc_dm_attrs { + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, +}; + +enum mlx5_ib_devx_methods { + MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_QUERY_UAR, + MLX5_IB_METHOD_DEVX_QUERY_EQN, + MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT, +}; + +enum mlx5_ib_devx_other_attrs { + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_create_attrs { + MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, +}; + +enum mlx5_ib_devx_query_uar_attrs { + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, +}; + +enum mlx5_ib_devx_obj_destroy_attrs { + MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_obj_modify_attrs { + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_query_attrs { + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, +}; + +enum mlx5_ib_devx_obj_query_async_attrs { + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, +}; + +enum mlx5_ib_devx_subscribe_event_attrs { + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE, +}; + +enum mlx5_ib_devx_query_eqn_attrs { + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, +}; + +enum mlx5_ib_devx_obj_methods { + MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + MLX5_IB_METHOD_DEVX_OBJ_QUERY, + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, +}; + +enum mlx5_ib_var_alloc_attrs { + MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, +}; + +enum mlx5_ib_var_obj_destroy_attrs { + MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_var_obj_methods { + MLX5_IB_METHOD_VAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_VAR_OBJ_DESTROY, +}; + +enum mlx5_ib_uar_alloc_attrs { + MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE, + MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, + MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, + MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, +}; + +enum mlx5_ib_uar_obj_destroy_attrs { + MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_uar_obj_methods { + MLX5_IB_METHOD_UAR_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_UAR_OBJ_DESTROY, +}; + +enum mlx5_ib_devx_umem_reg_attrs { + MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, +}; + +enum mlx5_ib_devx_umem_dereg_attrs { + MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_pp_obj_methods { + MLX5_IB_METHOD_PP_OBJ_ALLOC = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_PP_OBJ_DESTROY, +}; + +enum mlx5_ib_pp_alloc_attrs { + MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX, + MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS, + MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, +}; + +enum mlx5_ib_pp_obj_destroy_attrs { + MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_umem_methods { + MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DEVX_UMEM_DEREG, +}; + +enum mlx5_ib_devx_async_cmd_fd_alloc_attrs { + MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_async_event_fd_alloc_attrs { + MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS, +}; + +enum mlx5_ib_devx_async_cmd_fd_methods { + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_async_event_fd_methods { + MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_objects { + MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_OBJECT_DEVX_OBJ, + MLX5_IB_OBJECT_DEVX_UMEM, + MLX5_IB_OBJECT_FLOW_MATCHER, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + MLX5_IB_OBJECT_VAR, + MLX5_IB_OBJECT_PP, + MLX5_IB_OBJECT_UAR, +}; + +enum mlx5_ib_flow_matcher_create_attrs { + MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, +}; + +enum mlx5_ib_flow_matcher_destroy_attrs { + MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_flow_matcher_methods { + MLX5_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, +}; + +#define MLX5_IB_DW_MATCH_PARAM 0x80 + +struct mlx5_ib_match_params { + __u32 match_params[MLX5_IB_DW_MATCH_PARAM]; +}; + +enum mlx5_ib_flow_type { + MLX5_IB_FLOW_TYPE_NORMAL, + MLX5_IB_FLOW_TYPE_SNIFFER, + MLX5_IB_FLOW_TYPE_ALL_DEFAULT, + MLX5_IB_FLOW_TYPE_MC_DEFAULT, +}; + +enum mlx5_ib_create_flow_attrs { + MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + MLX5_IB_ATTR_CREATE_FLOW_TAG, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, +}; + +enum mlx5_ib_destoy_flow_attrs { + MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_flow_methods { + MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DESTROY_FLOW, +}; + +enum mlx5_ib_flow_action_methods { + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, +}; + +enum mlx5_ib_create_flow_action_create_modify_header_attrs { + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, +}; + +enum mlx5_ib_create_flow_action_create_packet_reformat_attrs { + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, +}; + +#endif diff --git a/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_verbs.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_USER_IOCTL_VERBS_H +#define MLX5_USER_IOCTL_VERBS_H + +#include + +enum mlx5_ib_uapi_flow_action_flags { + MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, +}; + +enum mlx5_ib_uapi_flow_table_type { + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX = 0x3, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX = 0x4, +}; + +enum mlx5_ib_uapi_flow_action_packet_reformat_type { + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1, + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2, + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, +}; + +struct mlx5_ib_uapi_devx_async_cmd_hdr { + __aligned_u64 wr_id; + __u8 out_data[]; +}; + +enum mlx5_ib_uapi_dm_type { + MLX5_IB_UAPI_DM_TYPE_MEMIC, + MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM, + MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, +}; + +enum mlx5_ib_uapi_devx_create_event_channel_flags { + MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA = 1 << 0, +}; + +struct mlx5_ib_uapi_devx_async_event_hdr { + __aligned_u64 cookie; + __u8 out_data[]; +}; + +enum mlx5_ib_uapi_pp_alloc_flags { + MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX = 1 << 0, +}; + +enum mlx5_ib_uapi_uar_alloc_type { + MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF = 0x0, + MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC = 0x1, +}; + +#endif + diff --git a/sys/ofed/include/uapi/rdma/rdma_user_ioctl.h b/sys/ofed/include/uapi/rdma/rdma_user_ioctl.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/uapi/rdma/rdma_user_ioctl.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2016 Mellanox Technologies, LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_IOCTL_H +#define RDMA_USER_IOCTL_H + +#ifdef _KERNEL +#include +#include +#else +#include +#include +#endif + +#include +#include + +/* Legacy name, for user space application which already use it */ +#define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC + +/* + * General blocks assignments + * It is closed on purpose do not expose it it user space + * #define MAD_CMD_BASE 0x00 + */ + +/* MAD specific section */ +#define IB_USER_MAD_REGISTER_AGENT _IOWR(RDMA_IOCTL_MAGIC, 0x01, struct ib_user_mad_reg_req) +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(RDMA_IOCTL_MAGIC, 0x02, __u32) +#define IB_USER_MAD_ENABLE_PKEY _IO(RDMA_IOCTL_MAGIC, 0x03) +#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(RDMA_IOCTL_MAGIC, 0x04, struct ib_user_mad_reg_req2) + +#endif /* RDMA_USER_IOCTL_H */ diff --git a/sys/ofed/include/uapi/rdma/rdma_user_ioctl_cmds.h b/sys/ofed/include/uapi/rdma/rdma_user_ioctl_cmds.h new file mode 100644 --- /dev/null +++ b/sys/ofed/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_IOCTL_CMDS_H +#define RDMA_USER_IOCTL_CMDS_H + +#ifdef _KERNEL +#include +#include +#else +#include +#include +#endif + +/* Documentation/ioctl/ioctl-number.rst */ +#define RDMA_IOCTL_MAGIC 0x1b +#define RDMA_VERBS_IOCTL \ + _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) + +enum { + /* User input */ + UVERBS_ATTR_F_MANDATORY = 1U << 0, + /* + * Valid output bit should be ignored and considered set in + * mandatory fields. This bit is kernel output. + */ + UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1, +}; + +struct ib_uverbs_attr { + __u16 attr_id; /* command specific type attribute */ + __u16 len; /* only for pointers and IDRs array */ + __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ + union { + struct { + __u8 elem_id; + __u8 reserved; + } enum_data; + __u16 reserved; + } attr_data; + union { + /* + * ptr to command, inline data, idr/fd or + * ptr to __u32 array of IDRs + */ + __aligned_u64 data; + /* Used by FD_IN and FD_OUT */ + __s64 data_s64; + }; +}; + +struct ib_uverbs_ioctl_hdr { + __u16 length; + __u16 object_id; + __u16 method_id; + __u16 num_attrs; + __aligned_u64 reserved1; + __u32 driver_id; + __u32 reserved2; + struct ib_uverbs_attr attrs[0]; +}; + +#endif