Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c (revision 323650) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucm.c (revision 323651) @@ -1,1371 +1,1370 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); struct ib_ucm_device { int devnum; struct cdev cdev; struct device dev; struct ib_device *ib_dev; }; struct ib_ucm_file { struct mutex file_mutex; struct file *filp; struct ib_ucm_device *device; struct list_head ctxs; struct list_head events; wait_queue_head_t poll_wait; }; struct ib_ucm_context { int id; struct completion comp; atomic_t ref; int events_reported; struct ib_ucm_file *file; struct ib_cm_id *cm_id; __u64 uid; struct list_head events; /* list of pending events. */ struct list_head file_list; /* member in file ctx list */ }; struct ib_ucm_event { struct ib_ucm_context *ctx; struct list_head file_list; /* member in file event list */ struct list_head ctx_list; /* member in ctx event list */ struct ib_cm_id *cm_id; struct ib_ucm_event_resp resp; void *data; void *info; int data_len; int info_len; }; enum { IB_UCM_MAJOR = 231, IB_UCM_BASE_MINOR = 224, IB_UCM_MAX_DEVICES = 32 }; #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); static void ib_ucm_remove_one(struct ib_device *device, void *client_data); static struct ib_client ucm_client = { .name = "ucm", .add = ib_ucm_add_one, .remove = ib_ucm_remove_one }; static DEFINE_MUTEX(ctx_id_mutex); static DEFINE_IDR(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else atomic_inc(&ctx->ref); mutex_unlock(&ctx_id_mutex); return ctx; } static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) complete(&ctx->comp); } static inline int ib_ucm_new_cm_id(int event) { return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; } static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) { struct ib_ucm_event *uevent; mutex_lock(&ctx->file->file_mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { uevent = list_entry(ctx->events.next, struct ib_ucm_event, ctx_list); list_del(&uevent->file_list); list_del(&uevent->ctx_list); mutex_unlock(&ctx->file->file_mutex); /* clear incoming connections. */ if (ib_ucm_new_cm_id(uevent->resp.event)) ib_destroy_cm_id(uevent->cm_id); kfree(uevent); mutex_lock(&ctx->file->file_mutex); } mutex_unlock(&ctx->file->file_mutex); } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) return NULL; atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); ctx->file = file; INIT_LIST_HEAD(&ctx->events); mutex_lock(&ctx_id_mutex); ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); mutex_unlock(&ctx_id_mutex); if (ctx->id < 0) goto error; list_add_tail(&ctx->file_list, &file->ctxs); return ctx; error: kfree(ctx); return NULL; } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; ureq->remote_qpn = kreq->remote_qpn; ureq->qp_type = kreq->qp_type; ureq->starting_psn = kreq->starting_psn; ureq->responder_resources = kreq->responder_resources; ureq->initiator_depth = kreq->initiator_depth; ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; ureq->flow_control = kreq->flow_control; ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; ureq->port = kreq->port; ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); if (kreq->alternate_path) ib_copy_path_rec_to_user(&ureq->alternate_path, kreq->alternate_path); } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; urep->remote_qpn = krep->remote_qpn; urep->starting_psn = krep->starting_psn; urep->responder_resources = krep->responder_resources; urep->initiator_depth = krep->initiator_depth; urep->target_ack_delay = krep->target_ack_delay; urep->failover_accepted = krep->failover_accepted; urep->flow_control = krep->flow_control; urep->rnr_retry_count = krep->rnr_retry_count; urep->srq = krep->srq; } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; static int ib_ucm_event_process(struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; switch (evt->event) { case IB_CM_REQ_RECEIVED: ib_ucm_event_req_get(&uvt->resp.u.req_resp, &evt->param.req_rcvd); uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_PRIMARY; uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? IB_UCM_PRES_ALTERNATE : 0); break; case IB_CM_REP_RECEIVED: ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, &evt->param.rep_rcvd); uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; break; case IB_CM_RTU_RECEIVED: uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREQ_RECEIVED: uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREP_RECEIVED: uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_MRA_RECEIVED: uvt->resp.u.mra_resp.timeout = evt->param.mra_rcvd.service_timeout; uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; break; case IB_CM_REJ_RECEIVED: uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.rej_rcvd.ari_length; info = evt->param.rej_rcvd.ari; break; case IB_CM_LAP_RECEIVED: ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, evt->param.lap_rcvd.alternate_path); uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_ALTERNATE; break; case IB_CM_APR_RECEIVED: uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.apr_rcvd.info_len; info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, &evt->param.sidr_rep_rcvd); uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.sidr_rep_rcvd.info_len; info = evt->param.sidr_rep_rcvd.info; break; default: uvt->resp.u.send_status = evt->param.send_status; break; } if (uvt->data_len) { uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); if (!uvt->data) goto err1; uvt->resp.present |= IB_UCM_PRES_DATA; } if (uvt->info_len) { uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); if (!uvt->info) goto err2; uvt->resp.present |= IB_UCM_PRES_INFO; } return 0; err2: kfree(uvt->data); err1: return -ENOMEM; } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; int result = 0; ctx = cm_id->context; uevent = kzalloc(sizeof *uevent, GFP_KERNEL); if (!uevent) goto err1; uevent->ctx = ctx; uevent->cm_id = cm_id; uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; uevent->resp.event = event->event; result = ib_ucm_event_process(event, uevent); if (result) goto err2; mutex_lock(&ctx->file->file_mutex); list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); wake_up_interruptible(&ctx->file->poll_wait); - linux_poll_wakeup(ctx->file->filp); mutex_unlock(&ctx->file->file_mutex); return 0; err2: kfree(uevent); err1: /* Destroy new cm_id's */ return ib_ucm_new_cm_id(event->event); } static ssize_t ib_ucm_event(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_event_get cmd; struct ib_ucm_event *uevent; int result = 0; if (out_len < sizeof(struct ib_ucm_event_resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); while (list_empty(&file->events)) { mutex_unlock(&file->file_mutex); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->events))) return -ERESTARTSYS; mutex_lock(&file->file_mutex); } uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); if (ib_ucm_new_cm_id(uevent->resp.event)) { ctx = ib_ucm_ctx_alloc(file); if (!ctx) { result = -ENOMEM; goto done; } ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; } if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof(uevent->resp))) { result = -EFAULT; goto done; } if (uevent->data) { if (cmd.data_len < uevent->data_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.data, uevent->data, uevent->data_len)) { result = -EFAULT; goto done; } } if (uevent->info) { if (cmd.info_len < uevent->info_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.info, uevent->info, uevent->info_len)) { result = -EFAULT; goto done; } } list_del(&uevent->file_list); list_del(&uevent->ctx_list); uevent->ctx->events_reported++; kfree(uevent->data); kfree(uevent->info); kfree(uevent); done: mutex_unlock(&file->file_mutex); return result; } static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_create_id cmd; struct ib_ucm_create_id_resp resp; struct ib_ucm_context *ctx; int result; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); ctx = ib_ucm_ctx_alloc(file); mutex_unlock(&file->file_mutex); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; ctx->cm_id = ib_create_cm_id(file->device->ib_dev, ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; goto err2; } return 0; err2: ib_destroy_cm_id(ctx->cm_id); err1: mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); kfree(ctx); return result; } static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_destroy_id cmd; struct ib_ucm_destroy_id_resp resp; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, cmd.id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); ib_ucm_ctx_put(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the cm_id. */ ib_destroy_cm_id(ctx->cm_id); /* Cleanup events not yet reported to the user. */ ib_ucm_cleanup_events(ctx); resp.events_reported = ctx->events_reported; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; kfree(ctx); return result; } static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_attr_id_resp resp; struct ib_ucm_attr_id cmd; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.service_id = ctx->cm_id->service_id; resp.service_mask = ctx->cm_id->service_mask; resp.local_id = ctx->cm_id->local_id; resp.remote_id = ctx->cm_id->remote_id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_uverbs_qp_attr resp; struct ib_ucm_init_qp_attr cmd; struct ib_ucm_context *ctx; struct ib_qp_attr qp_attr; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); if (result) goto out; ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; out: ib_ucm_ctx_put(ctx); return result; } static int ucm_validate_listen(__be64 service_id, __be64 service_mask) { service_id &= service_mask; if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) return -EINVAL; return 0; } static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_listen cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ucm_validate_listen(cmd.service_id, cmd.service_mask); if (result) goto out; result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); out: ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_notify(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_notify cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ib_ucm_ctx_put(ctx); return result; } static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) { void *data; *dest = NULL; if (!len) return 0; data = memdup_user((void __user *)(unsigned long)src, len); if (IS_ERR(data)) return PTR_ERR(data); *dest = data; return 0; } static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) { struct ib_user_path_rec upath; struct ib_sa_path_rec *sa_path; *path = NULL; if (!src) return 0; sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); if (!sa_path) return -ENOMEM; if (copy_from_user(&upath, (void __user *)(unsigned long)src, sizeof(upath))) { kfree(sa_path); return -EFAULT; } ib_copy_path_rec_from_user(sa_path, &upath); *path = sa_path; return 0; } static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_req cmd; int result; param.private_data = NULL; param.primary_path = NULL; param.alternate_path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); if (result) goto done; result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.qp_num = cmd.qpn; param.qp_type = cmd.qp_type; param.starting_psn = cmd.psn; param.peer_to_peer = cmd.peer_to_peer; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; param.flow_control = cmd.flow_control; param.local_cm_response_timeout = cmd.local_cm_response_timeout; param.retry_count = cmd.retry_count; param.rnr_retry_count = cmd.rnr_retry_count; param.max_cm_retries = cmd.max_cm_retries; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.primary_path); kfree(param.alternate_path); return result; } static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_rep_param param; struct ib_ucm_context *ctx; struct ib_ucm_rep cmd; int result; param.private_data = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) return result; param.qp_num = cmd.qpn; param.starting_psn = cmd.psn; param.private_data_len = cmd.len; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.failover_accepted = cmd.failover_accepted; param.flow_control = cmd.flow_control; param.rnr_retry_count = cmd.rnr_retry_count; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { ctx->uid = cmd.uid; result = ib_send_cm_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(param.private_data); return result; } static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len)) { struct ib_ucm_private_data cmd; struct ib_ucm_context *ctx; const void *private_data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, private_data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(private_data); return result; } static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); } static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); } static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); } static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, int status, const void *info, u8 info_len, const void *data, u8 data_len)) { struct ib_ucm_context *ctx; struct ib_ucm_info cmd; const void *data = NULL; const void *info = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, cmd.status, info, cmd.info_len, data, cmd.data_len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(info); return result; } static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); } static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); } static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_mra cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(data); return result; } static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_sa_path_rec *path = NULL; struct ib_ucm_lap cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(&path, cmd.path); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(path); return result; } static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; param.private_data = NULL; param.path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.path, cmd.path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.timeout_ms = cmd.timeout; param.max_cm_retries = cmd.max_cm_retries; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.path); return result; } static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_rep_param param; struct ib_ucm_sidr_rep cmd; struct ib_ucm_context *ctx; int result; param.info = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); if (result) goto done; param.qp_num = cmd.qpn; param.qkey = cmd.qkey; param.status = cmd.status; param.info_length = cmd.info_len; param.private_data_len = cmd.data_len; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.info); return result; } static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) = { [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, [IB_USER_CM_CMD_EVENT] = ib_ucm_event, [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, }; static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_cmd_hdr hdr; ssize_t result; if (WARN_ON_ONCE(!ib_safe_file_access(filp))) return -EACCES; if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) return -EINVAL; result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!result) result = len; return result; } static unsigned int ib_ucm_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_ucm_file *file = filp->private_data; unsigned int mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->events)) mask = POLLIN | POLLRDNORM; return mask; } /* * ib_ucm_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; file = kmalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; INIT_LIST_HEAD(&file->events); INIT_LIST_HEAD(&file->ctxs); init_waitqueue_head(&file->poll_wait); mutex_init(&file->file_mutex); filp->private_data = file; file->filp = filp; file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); return nonseekable_open(inode, filp); } static int ib_ucm_close(struct inode *inode, struct file *filp) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_context *ctx; mutex_lock(&file->file_mutex); while (!list_empty(&file->ctxs)) { ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); mutex_unlock(&file->file_mutex); mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); ib_destroy_cm_id(ctx->cm_id); ib_ucm_cleanup_events(ctx); kfree(ctx); mutex_lock(&file->file_mutex); } mutex_unlock(&file->file_mutex); kfree(file); return 0; } static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); kfree(ucm_dev); } static const struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, .write = ib_ucm_write, .poll = ib_ucm_poll, .llseek = no_llseek, }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static dev_t overflow_maj; static int find_overflow_devnum(void) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { pr_err("ucm: couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); if (ret >= IB_UCM_MAX_DEVICES) return -1; return ret; } static void ib_ucm_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); if (!ucm_dev) return; ucm_dev->ib_dev = device; devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); if (devnum >= IB_UCM_MAX_DEVICES) { devnum = find_overflow_devnum(); if (devnum < 0) goto err; ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { ucm_dev->devnum = devnum; base = devnum + IB_UCM_BASE_DEV; set_bit(devnum, dev_map); } cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); if (cdev_add(&ucm_dev->cdev, base, 1)) goto err; ucm_dev->dev.class = &cm_class; ucm_dev->dev.parent = device->dma_device; ucm_dev->dev.devt = ucm_dev->cdev.dev; ucm_dev->dev.release = ib_ucm_release_dev; dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); if (device_register(&ucm_dev->dev)) goto err_cdev; if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) goto err_dev; ib_set_client_data(device, &ucm_client, ucm_dev); return; err_dev: device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); err: kfree(ucm_dev); return; } static void ib_ucm_remove_one(struct ib_device *device, void *client_data) { struct ib_ucm_device *ucm_dev = client_data; if (!ucm_dev) return; device_unregister(&ucm_dev->dev); } static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_CM_ABI_VERSION)); static int __init ib_ucm_init(void) { int ret; ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { pr_err("ucm: couldn't register device number\n"); goto error1; } ret = class_create_file(&cm_class, &class_attr_abi_version.attr); if (ret) { pr_err("ucm: couldn't create abi_version attribute\n"); goto error2; } ret = ib_register_client(&ucm_client); if (ret) { pr_err("ucm: couldn't register client\n"); goto error3; } return 0; error3: class_remove_file(&cm_class, &class_attr_abi_version.attr); error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); class_remove_file(&cm_class, &class_attr_abi_version.attr); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } module_init_order(ib_ucm_init, SI_ORDER_THIRD); module_exit(ib_ucm_cleanup); Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c (revision 323650) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_ucma.c (revision 323651) @@ -1,1755 +1,1754 @@ /* * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); MODULE_LICENSE("Dual BSD/GPL"); static unsigned int max_backlog = 1024; struct ucma_file { struct mutex mut; struct file *filp; struct list_head ctx_list; struct list_head event_list; wait_queue_head_t poll_wait; struct workqueue_struct *close_wq; }; struct ucma_context { int id; struct completion comp; atomic_t ref; int events_reported; int backlog; struct ucma_file *file; struct rdma_cm_id *cm_id; u64 uid; struct list_head list; struct list_head mc_list; /* mark that device is in process of destroying the internal HW * resources, protected by the global mut */ int closing; /* sync between removal event and id destroy, protected by file mut */ int destroying; struct work_struct close_work; }; struct ucma_multicast { struct ucma_context *ctx; int id; int events_reported; u64 uid; u8 join_state; struct list_head list; struct sockaddr_storage addr; }; struct ucma_event { struct ucma_context *ctx; struct ucma_multicast *mc; struct list_head list; struct rdma_cm_id *cm_id; struct rdma_ucm_event_resp resp; struct work_struct close_work; }; static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); static DEFINE_IDR(multicast_idr); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { struct ucma_context *ctx; ctx = idr_find(&ctx_idr, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); return ctx; } static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) { struct ucma_context *ctx; mutex_lock(&mut); ctx = _ucma_find_context(id, file); if (!IS_ERR(ctx)) { if (ctx->closing) ctx = ERR_PTR(-EIO); else atomic_inc(&ctx->ref); } mutex_unlock(&mut); return ctx; } static void ucma_put_ctx(struct ucma_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) complete(&ctx->comp); } static void ucma_close_event_id(struct work_struct *work) { struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); rdma_destroy_id(uevent_close->cm_id); kfree(uevent_close); } static void ucma_close_id(struct work_struct *work) { struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing * by its creator. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); } static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; mutex_lock(&mut); ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (ctx->id < 0) goto error; list_add_tail(&ctx->list, &file->ctx_list); return ctx; error: kfree(ctx); return NULL; } static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc; mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; mutex_lock(&mut); mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (mc->id < 0) goto error; mc->ctx = ctx; list_add_tail(&mc->list, &ctx->mc_list); return mc; error: kfree(mc); return NULL; } static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, struct rdma_conn_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; dst->responder_resources =src->responder_resources; dst->initiator_depth = src->initiator_depth; dst->flow_control = src->flow_control; dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; } static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, struct rdma_ud_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); dst->qp_num = src->qp_num; dst->qkey = src->qkey; } static void ucma_set_event_context(struct ucma_context *ctx, struct rdma_cm_event *event, struct ucma_event *uevent) { uevent->ctx = ctx; switch (event->event) { case RDMA_CM_EVENT_MULTICAST_JOIN: case RDMA_CM_EVENT_MULTICAST_ERROR: uevent->mc = __DECONST(struct ucma_multicast *, event->param.ud.private_data); uevent->resp.uid = uevent->mc->uid; uevent->resp.id = uevent->mc->id; break; default: uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; break; } } /* Called with file->mut locked for the relevant context. */ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) { struct ucma_context *ctx = cm_id->context; struct ucma_event *con_req_eve; int event_found = 0; if (ctx->destroying) return; /* only if context is pointing to cm_id that it owns it and can be * queued to be closed, otherwise that cm_id is an inflight one that * is part of that context event list pending to be detached and * reattached to its new context as part of ucma_get_event, * handled separately below. */ if (ctx->cm_id == cm_id) { mutex_lock(&mut); ctx->closing = 1; mutex_unlock(&mut); queue_work(ctx->file->close_wq, &ctx->close_work); return; } list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { if (con_req_eve->cm_id == cm_id && con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { list_del(&con_req_eve->list); INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); queue_work(ctx->file->close_wq, &con_req_eve->close_work); event_found = 1; break; } } if (!event_found) pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); } static int ucma_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct ucma_event *uevent; struct ucma_context *ctx = cm_id->context; int ret = 0; uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); if (!uevent) return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; mutex_lock(&ctx->file->mut); uevent->cm_id = cm_id; ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; if (cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; kfree(uevent); goto out; } ctx->backlog--; } else if (!ctx->uid || ctx->cm_id != cm_id) { /* * We ignore events for new connections until userspace has set * their context. This can only happen if an error occurs on a * new connection before the user accepts it. This is okay, * since the accept will just fail later. However, we do need * to release the underlying HW resources in case of a device * removal event. */ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) ucma_removal_event_handler(cm_id); kfree(uevent); goto out; } list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); - linux_poll_wakeup(ctx->file->filp); if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) ucma_removal_event_handler(cm_id); out: mutex_unlock(&ctx->file->mut); return ret; } static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct ucma_context *ctx; struct rdma_ucm_get_event cmd; struct ucma_event *uevent; int ret = 0; if (out_len < sizeof uevent->resp) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->mut); while (list_empty(&file->event_list)) { mutex_unlock(&file->mut); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->event_list))) return -ERESTARTSYS; mutex_lock(&file->mut); } uevent = list_entry(file->event_list.next, struct ucma_event, list); if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { ctx = ucma_alloc_ctx(file); if (!ctx) { ret = -ENOMEM; goto done; } uevent->ctx->backlog++; ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; } if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof uevent->resp)) { ret = -EFAULT; goto done; } list_del(&uevent->list); uevent->ctx->events_reported++; if (uevent->mc) uevent->mc->events_reported++; kfree(uevent); done: mutex_unlock(&file->mut); return ret; } static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) { switch (cmd->ps) { case RDMA_PS_TCP: *qp_type = IB_QPT_RC; return 0; case RDMA_PS_UDP: case RDMA_PS_IPOIB: *qp_type = IB_QPT_UD; return 0; case RDMA_PS_IB: *qp_type = cmd->qp_type; return 0; default: return -EINVAL; } } static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_create_id cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; enum ib_qp_type qp_type; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ret = ucma_get_qp_type(&cmd, &qp_type); if (ret) return ret; mutex_lock(&file->mut); ctx = ucma_alloc_ctx(file); mutex_unlock(&file->mut); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; ctx->cm_id = rdma_create_id(TD_TO_VNET(curthread), ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { ret = -EFAULT; goto err2; } return 0; err2: rdma_destroy_id(ctx->cm_id); err1: mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); kfree(ctx); return ret; } static void ucma_cleanup_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc, *tmp; mutex_lock(&mut); list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { list_del(&mc->list); idr_remove(&multicast_idr, mc->id); kfree(mc); } mutex_unlock(&mut); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { if (uevent->mc != mc) continue; list_del(&uevent->list); kfree(uevent); } } /* * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At * this point, no new events will be reported from the hardware. However, we * still need to cleanup the UCMA context for this ID. Specifically, there * might be events that have not yet been consumed by the user space software. * These might include pending connect requests which we have not completed * processing. We cannot call rdma_destroy_id while holding the lock of the * context (file->mut), as it might cause a deadlock. We therefore extract all * relevant events from the context pending events list while holding the * mutex. After that we release them as needed. */ static int ucma_free_ctx(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); ucma_cleanup_multicast(ctx); /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { if (uevent->ctx == ctx) list_move_tail(&uevent->list, &list); } list_del(&ctx->list); mutex_unlock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &list, list) { list_del(&uevent->list); if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) rdma_destroy_id(uevent->cm_id); kfree(uevent); } events_reported = ctx->events_reported; kfree(ctx); return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_destroy_id cmd; struct rdma_ucm_destroy_id_resp resp; struct ucma_context *ctx; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&mut); ctx = _ucma_find_context(cmd.id, file); if (!IS_ERR(ctx)) idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->file->mut); ctx->destroying = 1; mutex_unlock(&ctx->file->mut); flush_workqueue(ctx->file->close_wq); /* At this point it's guaranteed that there is no inflight * closing task */ mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); rdma_destroy_id(ctx->cm_id); } else { mutex_unlock(&mut); } resp.events_reported = ucma_free_ctx(ctx); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind_ip cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind cmd; struct sockaddr *addr; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; addr = (struct sockaddr *) &cmd.addr; if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_bind_addr(ctx->cm_id, addr); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_ip cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_addr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_addr cmd; struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; src = (struct sockaddr *) &cmd.src_addr; dst = (struct sockaddr *) &cmd.dst_addr; if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_route cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); break; case 2: ib_copy_path_rec_to_user(&resp->ib_route[1], &route->path_rec[1]); /* fall through */ case 1: ib_copy_path_rec_to_user(&resp->ib_route[0], &route->path_rec[0]); break; default: break; } } static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, (union ib_gid *)&resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); break; case 2: ib_copy_path_rec_to_user(&resp->ib_route[1], &route->path_rec[1]); /* fall through */ case 1: ib_copy_path_rec_to_user(&resp->ib_route[0], &route->path_rec[0]); break; default: break; } } static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); } static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_query cmd; struct rdma_ucm_query_route_resp resp; struct ucma_context *ctx; struct sockaddr *addr; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (!ctx->cm_id->device) goto out; resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_ib_route(&resp, &ctx->cm_id->route); else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iboe_route(&resp, &ctx->cm_id->route); else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; ucma_put_ctx(ctx); return ret; } static void ucma_query_device_addr(struct rdma_cm_id *cm_id, struct rdma_ucm_query_addr_resp *resp) { if (!cm_id->device) return; resp->node_guid = (__force __u64) cm_id->device->node_guid; resp->port_num = cm_id->port_num; resp->pkey = (__force __u16) cpu_to_be16( ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); } static ssize_t ucma_query_addr(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_addr_resp resp; struct sockaddr *addr; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; resp.src_size = rdma_addr_size(addr); memcpy(&resp.src_addr, addr, resp.src_size); addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; resp.dst_size = rdma_addr_size(addr); memcpy(&resp.dst_addr, addr, resp.dst_size); ucma_query_device_addr(ctx->cm_id, &resp); if (copy_to_user(response, &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_query_path(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_path_resp *resp; int i, ret = 0; if (out_len < sizeof(*resp)) return -ENOSPC; resp = kzalloc(out_len, GFP_KERNEL); if (!resp) return -ENOMEM; resp->num_paths = ctx->cm_id->route.num_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL; ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], &resp->path_data[i].path_rec); } if (copy_to_user(response, resp, sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) ret = -EFAULT; kfree(resp); return ret; } static ssize_t ucma_query_gid(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_addr_resp resp; struct sockaddr_ib *addr; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; memset(&resp, 0, sizeof resp); ucma_query_device_addr(ctx->cm_id, &resp); addr = (struct sockaddr_ib *) &resp.src_addr; resp.src_size = sizeof(*addr); if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); } else { addr->sib_family = AF_IB; addr->sib_pkey = (__force __be16) resp.pkey; rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, (union ib_gid *) &addr->sib_addr); addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) &ctx->cm_id->route.addr.src_addr); } addr = (struct sockaddr_ib *) &resp.dst_addr; resp.dst_size = sizeof(*addr); if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); } else { addr->sib_family = AF_IB; addr->sib_pkey = (__force __be16) resp.pkey; rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, (union ib_gid *) &addr->sib_addr); addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr); } if (copy_to_user(response, &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_query(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_query cmd; struct ucma_context *ctx; void __user *response; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; response = (void __user *)(unsigned long) cmd.response; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); switch (cmd.option) { case RDMA_USER_CM_QUERY_ADDR: ret = ucma_query_addr(ctx, response, out_len); break; case RDMA_USER_CM_QUERY_PATH: ret = ucma_query_path(ctx, response, out_len); break; case RDMA_USER_CM_QUERY_GID: ret = ucma_query_gid(ctx, response, out_len); break; default: ret = -ENOSYS; break; } ucma_put_ctx(ctx); return ret; } static void ucma_copy_conn_param(struct rdma_cm_id *id, struct rdma_conn_param *dst, struct rdma_ucm_conn_param *src) { dst->private_data = src->private_data; dst->private_data_len = src->private_data_len; dst->responder_resources =src->responder_resources; dst->initiator_depth = src->initiator_depth; dst->flow_control = src->flow_control; dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; } static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_connect cmd; struct rdma_conn_param conn_param; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (!cmd.conn_param.valid) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); ret = rdma_connect(ctx->cm_id, &conn_param); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_listen cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? cmd.backlog : max_backlog; ret = rdma_listen(ctx->cm_id, ctx->backlog); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_accept cmd; struct rdma_conn_param conn_param; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); } else ret = rdma_accept(ctx->cm_id, NULL); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_reject cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_disconnect cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_disconnect(ctx->cm_id); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_init_qp_attr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_init_qp_attr cmd; struct ib_uverbs_qp_attr resp; struct ucma_context *ctx; struct ib_qp_attr qp_attr; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); if (ret) goto out; ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; out: ucma_put_ctx(ctx); return ret; } static int ucma_set_option_id(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { int ret = 0; switch (optname) { case RDMA_OPTION_ID_TOS: if (optlen != sizeof(u8)) { ret = -EINVAL; break; } rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); break; case RDMA_OPTION_ID_REUSEADDR: if (optlen != sizeof(int)) { ret = -EINVAL; break; } ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); break; case RDMA_OPTION_ID_AFONLY: if (optlen != sizeof(int)) { ret = -EINVAL; break; } ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); break; default: ret = -ENOSYS; } return ret; } static int ucma_set_ib_path(struct ucma_context *ctx, struct ib_path_rec_data *path_data, size_t optlen) { struct ib_sa_path_rec sa_path; struct rdma_cm_event event; int ret; if (optlen % sizeof(*path_data)) return -EINVAL; for (; optlen; optlen -= sizeof(*path_data), path_data++) { if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL)) break; } if (!optlen) return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); ib_sa_unpack_path(path_data->path_rec, &sa_path); ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); if (ret) return ret; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; return ucma_event_handler(ctx->cm_id, &event); } static int ucma_set_option_ib(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { int ret; switch (optname) { case RDMA_OPTION_IB_PATH: ret = ucma_set_ib_path(ctx, optval, optlen); break; default: ret = -ENOSYS; } return ret; } static int ucma_set_option_level(struct ucma_context *ctx, int level, int optname, void *optval, size_t optlen) { int ret; switch (level) { case RDMA_OPTION_ID: ret = ucma_set_option_id(ctx, optname, optval, optlen); break; case RDMA_OPTION_IB: ret = ucma_set_option_ib(ctx, optname, optval, optlen); break; default: ret = -ENOSYS; } return ret; } static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_set_option cmd; struct ucma_context *ctx; void *optval; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); optval = memdup_user((void __user *) (unsigned long) cmd.optval, cmd.optlen); if (IS_ERR(optval)) { ret = PTR_ERR(optval); goto out; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); kfree(optval); out: ucma_put_ctx(ctx); return ret; } static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_notify cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_process_join(struct ucma_file *file, struct rdma_ucm_join_mcast *cmd, int out_len) { struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; struct ucma_multicast *mc; struct sockaddr *addr; int ret; u8 join_state; if (out_len < sizeof(resp)) return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) return -EINVAL; if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) join_state = BIT(FULLMEMBER_JOIN); else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) join_state = BIT(SENDONLY_FULLMEMBER_JOIN); else return -EINVAL; ctx = ucma_get_ctx(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&file->mut); mc = ucma_alloc_multicast(ctx); if (!mc) { ret = -ENOMEM; goto err1; } mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); if (ret) goto err2; resp.id = mc->id; if (copy_to_user((void __user *)(unsigned long) cmd->response, &resp, sizeof(resp))) { ret = -EFAULT; goto err3; } mutex_unlock(&file->mut); ucma_put_ctx(ctx); return 0; err3: rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); ucma_cleanup_mc_events(mc); err2: mutex_lock(&mut); idr_remove(&multicast_idr, mc->id); mutex_unlock(&mut); list_del(&mc->list); kfree(mc); err1: mutex_unlock(&file->mut); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_join_ip_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_join_ip_mcast cmd; struct rdma_ucm_join_mcast join_cmd; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; join_cmd.response = cmd.response; join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); return ucma_process_join(file, &join_cmd, out_len); } static ssize_t ucma_join_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_join_mcast cmd; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; return ucma_process_join(file, &cmd, out_len); } static ssize_t ucma_leave_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_destroy_id cmd; struct rdma_ucm_destroy_id_resp resp; struct ucma_multicast *mc; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&mut); mc = idr_find(&multicast_idr, cmd.id); if (!mc) mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) mc = ERR_PTR(-EINVAL); else if (!atomic_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); else idr_remove(&multicast_idr, mc->id); mutex_unlock(&mut); if (IS_ERR(mc)) { ret = PTR_ERR(mc); goto out; } rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); list_del(&mc->list); mutex_unlock(&mc->ctx->file->mut); ucma_put_ctx(mc->ctx); resp.events_reported = mc->events_reported; kfree(mc); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; out: return ret; } static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) { /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) { if (file1 < file2) { mutex_unlock(&file2->mut); mutex_unlock(&file1->mut); } else { mutex_unlock(&file1->mut); mutex_unlock(&file2->mut); } } static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) { struct ucma_event *uevent, *tmp; list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) if (uevent->ctx == ctx) list_move_tail(&uevent->list, &file->event_list); } static ssize_t ucma_migrate_id(struct ucma_file *new_file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_context *ctx; struct fd f; struct ucma_file *cur_file; int ret = 0; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; /* Get current fd to protect against it being closed */ f = fdget(cmd.fd); if (!f.file) return -ENOENT; /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(f.file->private_data, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; } cur_file = ctx->file; if (cur_file == new_file) { resp.events_reported = ctx->events_reported; goto response; } /* * Migrate events between fd's, maintaining order, and avoiding new * events being added before existing events. */ ucma_lock_files(cur_file, new_file); mutex_lock(&mut); list_move_tail(&ctx->list, &new_file->ctx_list); ucma_move_events(ctx, new_file); ctx->file = new_file; resp.events_reported = ctx->events_reported; mutex_unlock(&mut); ucma_unlock_files(cur_file, new_file); response: if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; ucma_put_ctx(ctx); file_put: fdput(f); return ret; } static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, [RDMA_USER_CM_CMD_REJECT] = ucma_reject, [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, [RDMA_USER_CM_CMD_GET_OPTION] = NULL, [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, [RDMA_USER_CM_CMD_QUERY] = ucma_query, [RDMA_USER_CM_CMD_BIND] = ucma_bind, [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast }; static ssize_t ucma_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ucma_file *file = filp->private_data; struct rdma_ucm_cmd_hdr hdr; ssize_t ret; if (WARN_ON_ONCE(!ib_safe_file_access(filp))) return -EACCES; if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) return -EINVAL; if (!ucma_cmd_table[hdr.cmd]) return -ENOSYS; ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!ret) ret = len; return ret; } static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) { struct ucma_file *file = filp->private_data; unsigned int mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->event_list)) mask = POLLIN | POLLRDNORM; return mask; } /* * ucma_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ucma_open(struct inode *inode, struct file *filp) { struct ucma_file *file; file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) return -ENOMEM; file->close_wq = alloc_ordered_workqueue("ucma_close_id", WQ_MEM_RECLAIM); if (!file->close_wq) { kfree(file); return -ENOMEM; } INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); mutex_init(&file->mut); filp->private_data = file; file->filp = filp; return nonseekable_open(inode, filp); } static int ucma_close(struct inode *inode, struct file *filp) { struct ucma_file *file = filp->private_data; struct ucma_context *ctx, *tmp; mutex_lock(&file->mut); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { ctx->destroying = 1; mutex_unlock(&file->mut); mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); flush_workqueue(file->close_wq); /* At that step once ctx was marked as destroying and workqueue * was flushed we are safe from any inflights handlers that * might put other closing task. */ mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); /* rdma_destroy_id ensures that no event handlers are * inflight for that id before releasing it. */ rdma_destroy_id(ctx->cm_id); } else { mutex_unlock(&mut); } ucma_free_ctx(ctx); mutex_lock(&file->mut); } mutex_unlock(&file->mut); destroy_workqueue(file->close_wq); kfree(file); return 0; } static long ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case FIONBIO: case FIOASYNC: return (0); default: return (-ENOTTY); } } static const struct file_operations ucma_fops = { .owner = THIS_MODULE, .open = ucma_open, .release = ucma_close, .write = ucma_write, .unlocked_ioctl = ucma_ioctl, .poll = ucma_poll, .llseek = no_llseek, }; static struct miscdevice ucma_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "rdma_cm", .nodename = "infiniband/rdma_cm", .mode = 0666, .fops = &ucma_fops, }; static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int __init ucma_init(void) { int ret; ret = misc_register(&ucma_misc); if (ret) return ret; ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { pr_err("rdma_ucm: couldn't create abi_version attr\n"); goto err1; } return 0; err1: misc_deregister(&ucma_misc); return ret; } static void __exit ucma_cleanup(void) { device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); idr_destroy(&multicast_idr); } module_init(ucma_init); module_exit(ucma_cleanup); Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c (revision 323650) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_user_mad.c (revision 323651) @@ -1,1404 +1,1401 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define pr_fmt(fmt) "user_mad: " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UMAD_MAX_PORTS = 64, IB_UMAD_MAX_AGENTS = 32, IB_UMAD_MAJOR = 231, IB_UMAD_MINOR_BASE = 0 }; /* * Our lifetime rules for these structs are the following: * device special file is opened, we take a reference on the * ib_umad_port's struct ib_umad_device. We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there * is one more reference to each ib_umad_device representing the * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * * When destroying an ib_umad_device, we drop the module's reference. */ struct ib_umad_port { struct cdev cdev; struct device *dev; struct cdev sm_cdev; struct device *sm_dev; struct semaphore sm_sem; struct mutex file_mutex; struct list_head file_list; struct ib_device *ib_dev; struct ib_umad_device *umad_dev; int dev_num; u8 port_num; }; struct ib_umad_device { struct kobject kobj; struct ib_umad_port port[0]; }; struct ib_umad_file { struct mutex mutex; struct ib_umad_port *port; - struct file *filp; struct list_head recv_list; struct list_head send_list; struct list_head port_list; spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; int agents_dead; u8 use_pkey_index; u8 already_used; }; struct ib_umad_packet { struct ib_mad_send_buf *msg; struct ib_mad_recv_wc *recv_wc; struct list_head list; int length; struct ib_user_mad mad; }; static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_release_dev(struct kobject *kobj) { struct ib_umad_device *dev = container_of(kobj, struct ib_umad_device, kobj); kfree(dev); } static struct kobj_type ib_umad_dev_ktype = { .release = ib_umad_release_dev, }; static int hdr_size(struct ib_umad_file *file) { return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : sizeof (struct ib_user_mad_hdr_old); } /* caller must hold file->mutex */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { return file->agents_dead ? NULL : file->agent[id]; } static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) { int ret = 1; mutex_lock(&file->mutex); for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { list_add_tail(&packet->list, &file->recv_list); wake_up_interruptible(&file->recv_wait); - linux_poll_wakeup(file->filp); ret = 0; break; } mutex_unlock(&file->mutex); return ret; } static void dequeue_send(struct ib_umad_file *file, struct ib_umad_packet *packet) { spin_lock_irq(&file->send_lock); list_del(&packet->list); spin_unlock_irq(&file->send_lock); } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { packet->length = IB_MGMT_MAD_HDR; packet->mad.hdr.status = ETIMEDOUT; if (!queue_packet(file, agent, packet)) return; } kfree(packet); } static void recv_handler(struct ib_mad_agent *agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) goto err1; packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) goto err1; packet->length = mad_recv_wc->mad_len; packet->recv_wc = mad_recv_wc; packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); if (packet->mad.hdr.grh_present) { struct ib_ah_attr ah_attr; ib_init_ah_from_wc(agent->device, agent->port_num, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, &ah_attr); packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); } if (queue_packet(file, agent, packet)) goto err2; return; err2: kfree(packet); err1: ib_free_recv_mad(mad_recv_wc); } static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, struct ib_umad_packet *packet, size_t count) { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; size_t seg_size; recv_buf = &packet->recv_wc->recv_buf; seg_size = packet->recv_wc->mad_seg_size; /* We need enough room to copy the first (or only) MAD segment. */ if ((packet->length <= seg_size && count < hdr_size(file) + packet->length) || (packet->length > seg_size && count < hdr_size(file) + seg_size)) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); seg_payload = min_t(int, packet->length, seg_size); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; if (seg_payload < packet->length) { /* * Multipacket RMPP MAD message. Copy remainder of message. * Note that last segment may have a shorter payload. */ if (count < hdr_size(file) + packet->length) { /* * The buffer is too small, return the first RMPP segment, * which includes the RMPP message length. */ return -ENOSPC; } offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); max_seg_payload = seg_size - offset; for (left = packet->length - seg_payload, buf += seg_payload; left; left -= seg_payload, buf += seg_payload) { recv_buf = container_of(recv_buf->list.next, struct ib_mad_recv_buf, list); seg_payload = min(left, max_seg_payload); if (copy_to_user(buf, (char *)recv_buf->mad + offset, seg_payload)) return -EFAULT; } } return hdr_size(file) + packet->length; } static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, struct ib_umad_packet *packet, size_t count) { ssize_t size = hdr_size(file) + packet->length; if (count < size) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); if (copy_to_user(buf, packet->mad.data, packet->length)) return -EFAULT; return size; } static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; ssize_t ret; if (count < hdr_size(file)) return -EINVAL; mutex_lock(&file->mutex); while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->recv_wait, !list_empty(&file->recv_list))) return -ERESTARTSYS; mutex_lock(&file->mutex); } packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); mutex_unlock(&file->mutex); if (packet->recv_wc) ret = copy_recv_mad(file, buf, packet, count); else ret = copy_send_mad(file, buf, packet, count); if (ret < 0) { /* Requeue packet */ mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); mutex_unlock(&file->mutex); } else { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); kfree(packet); } return ret; } static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) { int left, seg; /* Copy class specific header */ if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && copy_from_user((char *)msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, msg->hdr_len - IB_MGMT_RMPP_HDR)) return -EFAULT; /* All headers are in place. Copy data segments. */ for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; seg++, left -= msg->seg_size, buf += msg->seg_size) { if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, min(left, msg->seg_size))) return -EFAULT; } return 0; } static int same_destination(struct ib_user_mad_hdr *hdr1, struct ib_user_mad_hdr *hdr2) { if (!hdr1->grh_present && !hdr2->grh_present) return (hdr1->lid == hdr2->lid); if (hdr1->grh_present && hdr2->grh_present) return !memcmp(hdr1->gid, hdr2->gid, 16); return 0; } static int is_duplicate(struct ib_umad_file *file, struct ib_umad_packet *packet) { struct ib_umad_packet *sent_packet; struct ib_mad_hdr *sent_hdr, *hdr; hdr = (struct ib_mad_hdr *) packet->mad.data; list_for_each_entry(sent_packet, &file->send_list, list) { sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; if ((hdr->tid != sent_hdr->tid) || (hdr->mgmt_class != sent_hdr->mgmt_class)) continue; /* * No need to be overly clever here. If two new operations have * the same TID, reject the second as a duplicate. This is more * restrictive than required by the spec. */ if (!ib_response_mad(hdr)) { if (!ib_response_mad(sent_hdr)) return 1; continue; } else if (!ib_response_mad(sent_hdr)) continue; if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) return 1; } return 0; } static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; u8 base_version; if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, hdr_size(file))) { ret = -EFAULT; goto err; } if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } buf += hdr_size(file); if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { ret = -EFAULT; goto err; } mutex_lock(&file->mutex); agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; goto err_up; } memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); ah_attr.sl = packet->mad.hdr.sl; ah_attr.src_path_bits = packet->mad.hdr.path_bits; ah_attr.port_num = file->port->port_num; if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } ah = ib_create_ah(agent->qp->pd, &ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; } rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } else { copy_offset = IB_MGMT_MAD_HDR; rmpp_active = 0; } base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), packet->mad.hdr.pkey_index, rmpp_active, hdr_len, data_len, GFP_KERNEL, base_version); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } packet->msg->ah = ah; packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); if (!rmpp_active) { if (copy_from_user((char *)packet->msg->mad + copy_offset, buf + copy_offset, hdr_len + data_len - copy_offset)) { ret = -EFAULT; goto err_msg; } } else { ret = copy_rmpp_mad(packet->msg, buf); if (ret) goto err_msg; } /* * Set the high-order part of the transaction ID to make MADs from * different agents unique, and allow routing responses back to the * original requestor. */ if (!ib_response_mad(packet->msg->mad)) { tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); rmpp_mad->mad_hdr.tid = *tid; } if (!ib_mad_kernel_rmpp_agent(agent) && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); } else { spin_lock_irq(&file->send_lock); ret = is_duplicate(file, packet); if (!ret) list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); if (ret) { ret = -EINVAL; goto err_msg; } } ret = ib_post_send_mad(packet->msg, NULL); if (ret) goto err_send; mutex_unlock(&file->mutex); return count; err_send: dequeue_send(file, packet); err_msg: ib_free_send_mad(packet->msg); err_ah: ib_destroy_ah(ah); err_up: mutex_unlock(&file->mutex); err: kfree(packet); return ret; } static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_umad_file *file = filp->private_data; /* we will always be able to post a MAD send */ unsigned int mask = POLLOUT | POLLWRNORM; poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= POLLIN | POLLRDNORM; return mask; } static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, int compat_method_mask) { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; struct ib_mad_agent *agent = NULL; int agent_id; int ret; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (!file->port->ib_dev) { dev_notice(file->port->dev, "ib_umad_reg_agent: invalid device\n"); ret = -EPIPE; goto out; } if (copy_from_user(&ureq, arg, sizeof ureq)) { ret = -EFAULT; goto out; } if (ureq.qpn != 0 && ureq.qpn != 1) { dev_notice(file->port->dev, "ib_umad_reg_agent: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; goto out; } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) if (!__get_agent(file, agent_id)) goto found; dev_notice(file->port->dev, "ib_umad_reg_agent: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { memset(&req, 0, sizeof(req)); req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; memcpy(req.oui, ureq.oui, sizeof req.oui); if (compat_method_mask) { u32 *umm = (u32 *) ureq.method_mask; int i; for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) req.method_mask[i] = umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); } else memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, send_handler, recv_handler, file, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *) ((char *)arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; goto out; } if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { dev_warn(file->port->dev, "process %s did not enable P_Key index support.\n", current->comm); dev_warn(file->port->dev, " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); } } file->agent[agent_id] = agent; ret = 0; out: mutex_unlock(&file->mutex); if (ret && agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) { struct ib_user_mad_reg_req2 ureq; struct ib_mad_reg_req req; struct ib_mad_agent *agent = NULL; int agent_id; int ret; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (!file->port->ib_dev) { dev_notice(file->port->dev, "ib_umad_reg_agent2: invalid device\n"); ret = -EPIPE; goto out; } if (copy_from_user(&ureq, arg, sizeof(ureq))) { ret = -EFAULT; goto out; } if (ureq.qpn != 0 && ureq.qpn != 1) { dev_notice(file->port->dev, "ib_umad_reg_agent2: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; goto out; } if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { const u32 flags = IB_USER_MAD_REG_FLAGS_CAP; dev_notice(file->port->dev, "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); ret = -EINVAL; if (put_user(flags, (u32 __user *) ((char *)arg + offsetof(struct ib_user_mad_reg_req2, flags)))) ret = -EFAULT; goto out; } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) if (!__get_agent(file, agent_id)) goto found; dev_notice(file->port->dev, "ib_umad_reg_agent2: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { memset(&req, 0, sizeof(req)); req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; if (ureq.oui & 0xff000000) { dev_notice(file->port->dev, "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", ureq.oui); ret = -EINVAL; goto out; } req.oui[2] = ureq.oui & 0x0000ff; req.oui[1] = (ureq.oui & 0x00ff00) >> 8; req.oui[0] = (ureq.oui & 0xff0000) >> 16; memcpy(req.method_mask, ureq.method_mask, sizeof(req.method_mask)); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, send_handler, recv_handler, file, ureq.flags); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *)((char *)arg + offsetof(struct ib_user_mad_reg_req2, id)))) { ret = -EFAULT; goto out; } if (!file->already_used) { file->already_used = 1; file->use_pkey_index = 1; } file->agent[agent_id] = agent; ret = 0; out: mutex_unlock(&file->mutex); if (ret && agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; if (get_user(id, arg)) return -EFAULT; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } agent = file->agent[id]; file->agent[id] = NULL; out: mutex_unlock(&file->mutex); if (agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static long ib_umad_enable_pkey(struct ib_umad_file *file) { int ret = 0; mutex_lock(&file->mutex); if (file->already_used) ret = -EINVAL; else file->use_pkey_index = 1; mutex_unlock(&file->mutex); return ret; } static long ib_umad_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); case IB_USER_MAD_UNREGISTER_AGENT: return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); case IB_USER_MAD_REGISTER_AGENT2: return ib_umad_reg_agent2(filp->private_data, (void __user *) arg); default: return -ENOIOCTLCMD; } } #ifdef CONFIG_COMPAT static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); case IB_USER_MAD_UNREGISTER_AGENT: return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); case IB_USER_MAD_REGISTER_AGENT2: return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg)); default: return -ENOIOCTLCMD; } } #endif /* * ib_umad_open() does not need the BKL: * * - the ib_umad_port structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - the ioctl method does not affect any global state outside of the * file structure being operated on; */ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; int ret = -ENXIO; port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, cdev); mutex_lock(&port->file_mutex); if (!port->ib_dev) goto out; ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); if (!file) goto out; mutex_init(&file->mutex); spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); INIT_LIST_HEAD(&file->send_list); init_waitqueue_head(&file->recv_wait); file->port = port; - file->filp = filp; filp->private_data = file; list_add_tail(&file->port_list, &port->file_list); ret = nonseekable_open(inode, filp); if (ret) { list_del(&file->port_list); kfree(file); goto out; } kobject_get(&port->umad_dev->kobj); out: mutex_unlock(&port->file_mutex); return ret; } static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int already_dead; int i; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); kfree(packet); } list_del(&file->port_list); mutex_unlock(&file->mutex); if (!already_dead) for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) if (file->agent[i]) ib_unregister_mad_agent(file->agent[i]); mutex_unlock(&file->port->file_mutex); kfree(file); kobject_put(&dev->kobj); return 0; } static const struct file_operations umad_fops = { .owner = THIS_MODULE, .read = ib_umad_read, .write = ib_umad_write, .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ib_umad_compat_ioctl, #endif .open = ib_umad_open, .release = ib_umad_close, .llseek = no_llseek, }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, sm_cdev); if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { ret = -EAGAIN; goto fail; } } else { if (down_interruptible(&port->sm_sem)) { ret = -ERESTARTSYS; goto fail; } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) goto err_up_sem; filp->private_data = port; ret = nonseekable_open(inode, filp); if (ret) goto err_clr_sm_cap; kobject_get(&port->umad_dev->kobj); return 0; err_clr_sm_cap: swap(props.set_port_cap_mask, props.clr_port_cap_mask); ib_modify_port(port->ib_dev, port->port_num, 0, &props); err_up_sem: up(&port->sm_sem); fail: return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) { struct ib_umad_port *port = filp->private_data; struct ib_port_modify props = { .clr_port_cap_mask = IB_PORT_SM }; int ret = 0; mutex_lock(&port->file_mutex); if (port->ib_dev) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); mutex_unlock(&port->file_mutex); up(&port->sm_sem); kobject_put(&port->umad_dev->kobj); return ret; } static const struct file_operations umad_sm_fops = { .owner = THIS_MODULE, .open = ib_umad_sm_open, .release = ib_umad_sm_close, .llseek = no_llseek, }; static struct ib_client umad_client = { .name = "umad", .add = ib_umad_add_one, .remove = ib_umad_remove_one }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%s\n", port->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_port(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%d\n", port->port_num); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_MAD_ABI_VERSION)); static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); static int find_overflow_devnum(struct ib_device *device) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { dev_err(&device->dev, "couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); if (ret >= IB_UMAD_MAX_PORTS) return -1; return ret; } static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_device *umad_dev, struct ib_umad_port *port) { int devnum; dev_t base; spin_lock(&port_lock); devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); devnum = find_overflow_devnum(device); if (devnum < 0) return -1; spin_lock(&port_lock); port->dev_num = devnum + IB_UMAD_MAX_PORTS; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { port->dev_num = devnum; base = devnum + base_dev; set_bit(devnum, dev_map); } spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; port->cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); if (cdev_add(&port->cdev, base, 1)) goto err_cdev; port->dev = device_create(umad_class, device->dma_device, port->cdev.dev, port, "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; if (device_create_file(port->dev, &dev_attr_ibdev)) goto err_dev; if (device_create_file(port->dev, &dev_attr_port)) goto err_dev; base += IB_UMAD_MAX_PORTS; cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; port->sm_cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; port->sm_dev = device_create(umad_class, device->dma_device, port->sm_cdev.dev, port, "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; if (device_create_file(port->sm_dev, &dev_attr_ibdev)) goto err_sm_dev; if (device_create_file(port->sm_dev, &dev_attr_port)) goto err_sm_dev; return 0; err_sm_dev: device_destroy(umad_class, port->sm_cdev.dev); err_sm_cdev: cdev_del(&port->sm_cdev); err_dev: device_destroy(umad_class, port->cdev.dev); err_cdev: cdev_del(&port->cdev); if (port->dev_num < IB_UMAD_MAX_PORTS) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); return -1; } static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; int id; dev_set_drvdata(port->dev, NULL); dev_set_drvdata(port->sm_dev, NULL); device_destroy(umad_class, port->cdev.dev); device_destroy(umad_class, port->sm_cdev.dev); cdev_del(&port->cdev); cdev_del(&port->sm_cdev); mutex_lock(&port->file_mutex); port->ib_dev = NULL; list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); file->agents_dead = 1; mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) if (file->agent[id]) ib_unregister_mad_agent(file->agent[id]); } mutex_unlock(&port->file_mutex); if (port->dev_num < IB_UMAD_MAX_PORTS) clear_bit(port->dev_num, dev_map); else clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); } static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; int count = 0; s = rdma_start_port(device); e = rdma_end_port(device); umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); for (i = s; i <= e; ++i) { if (!rdma_cap_ib_mad(device, i)) continue; umad_dev->port[i - s].umad_dev = umad_dev; if (ib_umad_init_port(device, i, umad_dev, &umad_dev->port[i - s])) goto err; count++; } if (!count) goto free; ib_set_client_data(device, &umad_client, umad_dev); return; err: while (--i >= s) { if (!rdma_cap_ib_mad(device, i)) continue; ib_umad_kill_port(&umad_dev->port[i - s]); } free: kobject_put(&umad_dev->kobj); } static void ib_umad_remove_one(struct ib_device *device, void *client_data) { struct ib_umad_device *umad_dev = client_data; int i; if (!umad_dev) return; for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) ib_umad_kill_port(&umad_dev->port[i]); } kobject_put(&umad_dev->kobj); } static char *umad_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_umad_init(void) { int ret; ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { pr_err("couldn't register device number\n"); goto out; } umad_class = class_create(THIS_MODULE, "infiniband_mad"); if (IS_ERR(umad_class)) { ret = PTR_ERR(umad_class); pr_err("couldn't create class infiniband_mad\n"); goto out_chrdev; } umad_class->devnode = umad_devnode; ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { pr_err("couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&umad_client); if (ret) { pr_err("couldn't register ib_umad client\n"); goto out_class; } return 0; out_class: class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); out: return ret; } static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); } module_init_order(ib_umad_init, SI_ORDER_THIRD); module_exit(ib_umad_cleanup); Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c (revision 323650) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c (revision 323651) @@ -1,1434 +1,1430 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = 32 }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static struct class *uverbs_class; DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); DEFINE_IDR(ib_uverbs_wq_idr); DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, }; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; int ret; ret = mw->device->dealloc_mw(mw); if (!ret) atomic_dec(&pd->usecnt); return ret; } static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = container_of(kobj, struct ib_uverbs_device, kobj); cleanup_srcu_struct(&dev->disassociate_srcu); kfree(dev); } static struct kobj_type ib_uverbs_dev_ktype = { .release = ib_uverbs_release_dev, }; static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = container_of(ref, struct ib_uverbs_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->lock); kref_put(&ev_file->ref, ib_uverbs_release_event_file); } spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj) { struct ib_uverbs_event *evt, *tmp; spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } static void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; context->closing = 1; list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); kfree(uobj); } /* Remove MWs before QPs, in order to support type 2A MWs. */ list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { struct ib_mw *mw = uobj->object; idr_remove_uobj(&ib_uverbs_mw_idr, uobj); uverbs_dealloc_mw(mw); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { struct ib_flow *flow_id = uobj->object; idr_remove_uobj(&ib_uverbs_rule_idr, uobj); ib_destroy_flow(flow_id); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); ib_destroy_rwq_ind_table(rwq_ind_tbl); kfree(ind_tbl); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { struct ib_wq *wq = uobj->object; struct ib_uwq_object *uwq = container_of(uobj, struct ib_uwq_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_wq_idr, uobj); ib_destroy_wq(wq); ib_uverbs_release_uevent(file, &uwq->uevent); kfree(uwq); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); kfree(uobj); } mutex_lock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { struct ib_xrcd *xrcd = uobj->object; struct ib_uxrcd_object *uxrcd = container_of(uobj, struct ib_uxrcd_object, uobject); idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); ib_uverbs_dealloc_xrcd(file->device, xrcd); kfree(uxrcd); } mutex_unlock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); kfree(uobj); } put_pid(context->tgid); return context->device->dealloc_ucontext(context); } static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); } static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); struct ib_device *ib_dev; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (ib_dev && !ib_dev->disassociate_ucontext) module_put(ib_dev->owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); kfree(file); } static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; int eventsz; int ret = 0; spin_lock_irq(&file->lock); while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, (!list_empty(&file->event_list) || /* The barriers built into wait_event_interruptible() * and wake_up() guarentee this will see the null set * without using RCU */ !file->uverbs_file->device->ib_dev))) return -ERESTARTSYS; /* If device was disassociated and no event exists set an error */ if (list_empty(&file->event_list) && !file->uverbs_file->device->ib_dev) return -EIO; spin_lock_irq(&file->lock); } event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); else eventsz = sizeof (struct ib_uverbs_comp_event_desc); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(file->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&file->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static unsigned int ib_uverbs_event_poll(struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; struct ib_uverbs_event_file *file = filp->private_data; poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->async_queue); } static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; int closed_already = 0; mutex_lock(&file->uverbs_file->device->lists_mutex); spin_lock_irq(&file->lock); closed_already = file->is_closed; file->is_closed = 1; list_for_each_entry_safe(entry, tmp, &file->event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } spin_unlock_irq(&file->lock); if (!closed_already) { list_del(&file->list); if (file->is_async) ib_unregister_event_handler(&file->uverbs_file-> event_handler); } mutex_unlock(&file->uverbs_file->device->lists_mutex); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync, .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!file) return; spin_lock_irqsave(&file->lock, flags); if (file->is_closed) { spin_unlock_irqrestore(&file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->lock, flags); return; } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&file->lock, flags); wake_up_interruptible(&file->poll_wait); - linux_poll_wakeup(file->filp); kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&file->async_file->lock, flags); wake_up_interruptible(&file->async_file->poll_wait); - linux_poll_wakeup(file->async_file->filp); kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject || !event->element.qp->uobject->live) return; uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.srq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); ib_uverbs_async_handler(file, event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) { kref_put(&file->async_file->ref, ib_uverbs_release_event_file); file->async_file = NULL; } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev, int is_async) { struct ib_uverbs_event_file *ev_file; struct file *filp; int ret; ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); kref_init(&ev_file->ref); spin_lock_init(&ev_file->lock); INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; kref_get(&ev_file->uverbs_file->ref); ev_file->async_queue = NULL; ev_file->is_closed = 0; /* * fops_get() can't fail here, because we're coming from a * system call on a uverbs file, which will already have a * module reference. */ filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); if (IS_ERR(filp)) goto err_put_refs; filp->private_data = ev_file; - ev_file->filp = filp; mutex_lock(&uverbs_file->device->lists_mutex); list_add_tail(&ev_file->list, &uverbs_file->device->uverbs_events_file_list); mutex_unlock(&uverbs_file->device->lists_mutex); if (is_async) { WARN_ON(uverbs_file->async_file); uverbs_file->async_file = ev_file; kref_get(&uverbs_file->async_file->ref); INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, ib_dev, ib_uverbs_event_handler); ret = ib_register_event_handler(&uverbs_file->event_handler); if (ret) goto err_put_file; /* At that point async file stuff was fully set */ ev_file->is_async = 1; } return filp; err_put_file: fput(filp); kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); uverbs_file->async_file = NULL; return ERR_PTR(ret); err_put_refs: kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); kref_put(&ev_file->ref, ib_uverbs_release_event_file); return filp; } /* * Look up a completion event file by FD. If lookup is successful, * takes a ref to the event file struct that it returns; if * unsuccessful, returns NULL. */ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; struct fd f = fdget(fd); if (!f.file) return NULL; if (f.file->f_op != &uverbs_event_fops) goto out; ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; } kref_get(&ev_file->ref); out: fdput(f); return ev_file; } static int verify_command_mask(struct ib_device *ib_dev, __u32 command) { u64 mask; if (command <= IB_USER_VERBS_CMD_OPEN_QP) mask = ib_dev->uverbs_cmd_mask; else mask = ib_dev->uverbs_ex_cmd_mask; if (mask & ((u64)1 << command)) return 0; return -1; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; __u32 command; __u32 flags; int srcu_key; ssize_t ret; if (WARN_ON_ONCE(!ib_safe_file_access(filp))) return -EACCES; if (count < sizeof hdr) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto out; } if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | IB_USER_VERBS_CMD_COMMAND_MASK)) { ret = -EINVAL; goto out; } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; if (verify_command_mask(ib_dev, command)) { ret = -EOPNOTSUPP; goto out; } if (!file->ucontext && command != IB_USER_VERBS_CMD_GET_CONTEXT) { ret = -EINVAL; goto out; } flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; if (!flags) { if (command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[command]) { ret = -EINVAL; goto out; } if (hdr.in_words * 4 != count) { ret = -EINVAL; goto out; } ret = uverbs_cmd_table[command](file, ib_dev, buf + sizeof(hdr), hdr.in_words * 4, hdr.out_words * 4); } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_udata ucore; struct ib_udata uhw; size_t written_count = count; if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || !uverbs_ex_cmd_table[command]) { ret = -ENOSYS; goto out; } if (!file->ucontext) { ret = -EINVAL; goto out; } if (count < (sizeof(hdr) + sizeof(ex_hdr))) { ret = -EINVAL; goto out; } if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { ret = -EFAULT; goto out; } count -= sizeof(hdr) + sizeof(ex_hdr); buf += sizeof(hdr) + sizeof(ex_hdr); if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { ret = -EINVAL; goto out; } if (ex_hdr.cmd_hdr_reserved) { ret = -EINVAL; goto out; } if (ex_hdr.response) { if (!hdr.out_words && !ex_hdr.provider_out_words) { ret = -EINVAL; goto out; } if (!access_ok(VERIFY_WRITE, (void __user *) (unsigned long) ex_hdr.response, (hdr.out_words + ex_hdr.provider_out_words) * 8)) { ret = -EFAULT; goto out; } } else { if (hdr.out_words || ex_hdr.provider_out_words) { ret = -EINVAL; goto out; } } INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, hdr.in_words * 8, hdr.out_words * 8); INIT_UDATA_BUF_OR_NULL(&uhw, buf + ucore.inlen, (unsigned long) ex_hdr.response + ucore.outlen, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); if (!ret) ret = written_count; } else { ret = -ENOSYS; } out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto out; } if (!file->ucontext) ret = -ENODEV; else ret = ib_dev->mmap(file->ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } /* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the open method will either immediately run -ENXIO, or all * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; struct ib_device *ib_dev; int ret; int module_dependent; int srcu_key; dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev); if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; srcu_key = srcu_read_lock(&dev->disassociate_srcu); mutex_lock(&dev->lists_mutex); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto err; } /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ module_dependent = !(ib_dev->disassociate_ucontext); if (module_dependent) { if (!try_module_get(ib_dev->owner)) { ret = -ENODEV; goto err; } } file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; if (module_dependent) goto err_module; goto err; } file->device = dev; file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); mutex_init(&file->cleanup_mutex); filp->private_data = file; kobject_get(&dev->kobj); list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return nonseekable_open(inode, filp); err_module: module_put(ib_dev->owner); err: mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_device *dev = file->device; mutex_lock(&file->cleanup_mutex); if (file->ucontext) { ib_uverbs_cleanup_ucontext(file, file->ucontext); file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { list_del(&file->list); file->is_closed = 1; } mutex_unlock(&file->device->lists_mutex); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); kobject_put(&dev->kobj); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, }; static struct ib_client uverbs_client = { .name = "uverbs", .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, char *buf) { int ret = -ENODEV; int srcu_key; struct ib_uverbs_device *dev = dev_get_drvdata(device); struct ib_device *ib_dev; if (!dev) return -ENODEV; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sprintf(buf, "%s\n", ib_dev->name); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; if (!dev) return -ENODEV; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); /* * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by * requesting a new major number and doubling the number of max devices we * support. It's stupid, but simple. */ static int find_overflow_devnum(void) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); if (ret >= IB_UVERBS_MAX_DEVICES) return -1; return ret; } static ssize_t show_dev_device(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev || !dev->ib_dev->dma_device) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->device); } static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL); static ssize_t show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev || !dev->ib_dev->dma_device) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); } static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); struct attribute *device_attrs[] = { &dev_attr_device.attr, &dev_attr_vendor.attr, NULL }; static struct attribute_group device_group = { .name = "device", .attrs = device_attrs }; static void ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_uverbs_device *uverbs_dev; int ret; if (!device->alloc_ucontext) return; uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); if (!uverbs_dev) return; ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); if (ret) { kfree(uverbs_dev); return; } atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); devnum = find_overflow_devnum(); if (devnum < 0) goto err; spin_lock(&map_lock); uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { uverbs_dev->devnum = devnum; base = devnum + IB_UVERBS_BASE_DEV; set_bit(devnum, dev_map); } spin_unlock(&map_lock); rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, uverbs_dev->cdev.dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) goto err_class; ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: device_destroy(uverbs_class, uverbs_dev->cdev.dev); err_cdev: cdev_del(&uverbs_dev->cdev); if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); err: if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); kobject_put(&uverbs_dev->kobj); return; } static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; struct ib_uverbs_event_file *event_file; struct ib_event event; /* Pending running commands to terminate */ synchronize_srcu(&uverbs_dev->disassociate_srcu); event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; event.device = ib_dev; mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); ib_uverbs_event_handler(&file->event_handler, &event); mutex_lock(&file->cleanup_mutex); ucontext = file->ucontext; file->ucontext = NULL; mutex_unlock(&file->cleanup_mutex); /* At this point ib_uverbs_close cannot be running * ib_uverbs_cleanup_ucontext */ if (ucontext) { /* We must release the mutex before going ahead and * calling disassociate_ucontext. disassociate_ucontext * might end up indirectly calling uverbs_close, * for example due to freeing the resources * (e.g mmput). */ ib_dev->disassociate_ucontext(ucontext); ib_uverbs_cleanup_ucontext(file, ucontext); } mutex_lock(&uverbs_dev->lists_mutex); kref_put(&file->ref, ib_uverbs_release_file); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { event_file = list_first_entry(&uverbs_dev-> uverbs_events_file_list, struct ib_uverbs_event_file, list); spin_lock_irq(&event_file->lock); event_file->is_closed = 1; spin_unlock_irq(&event_file->lock); list_del(&event_file->list); if (event_file->is_async) { ib_unregister_event_handler(&event_file->uverbs_file-> event_handler); event_file->uverbs_file->event_handler.device = NULL; } wake_up_interruptible(&event_file->poll_wait); - linux_poll_wakeup(event_file->filp); kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) { struct ib_uverbs_device *uverbs_dev = client_data; int wait_clients = 1; if (!uverbs_dev) return; sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); dev_set_drvdata(uverbs_dev->dev, NULL); device_destroy(uverbs_class, uverbs_dev->cdev.dev); cdev_del(&uverbs_dev->cdev); if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(uverbs_dev->devnum, dev_map); else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); if (device->disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not * valid any more. * uverbs_device is still available until all clients close * their files, then the uverbs device ref count will be zero * and its resources will be freed. * Note: At this point no more files can be opened since the * cdev was deleted, however active clients can still issue * commands and close their open files. */ rcu_assign_pointer(uverbs_dev->ib_dev, NULL); ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); kobject_put(&uverbs_dev->kobj); } static char *uverbs_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_uverbs_init(void) { int ret; ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } uverbs_class->devnode = uverbs_devnode; ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); if (ret) { pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { pr_err("user_verbs: couldn't register client\n"); goto out_class; } return 0; out_class: class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_ah_idr); idr_destroy(&ib_uverbs_cq_idr); idr_destroy(&ib_uverbs_qp_idr); idr_destroy(&ib_uverbs_srq_idr); } module_init_order(ib_uverbs_init, SI_ORDER_THIRD); module_exit(ib_uverbs_cleanup); Index: projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs.h =================================================================== --- projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs.h (revision 323650) +++ projects/bsd_rdma_4_9/sys/ofed/drivers/infiniband/core/uverbs.h (revision 323651) @@ -1,297 +1,296 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef UVERBS_H #define UVERBS_H #include #include #include #include #include #include #include #include #include #include #include #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->inbuf = (const void __user *) (ibuf); \ (udata)->outbuf = (void __user *) (obuf); \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) #define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \ (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) /* * Our lifetime rules for these structs are the following: * * struct ib_uverbs_device: One reference is held by the module and * released in ib_uverbs_remove_one(). Another reference is taken by * ib_uverbs_open() each time the character special file is opened, * and released in ib_uverbs_release_file() when the file is released. * * struct ib_uverbs_file: One reference is held by the VFS and * released when the file is closed. Another reference is taken when * an asynchronous event queue file is created and released when the * event file is closed. * * struct ib_uverbs_event_file: One reference is held by the VFS and * released when the file is closed. For asynchronous event files, * another reference is held by the corresponding main context file * and released when that file is closed. For completion event files, * a reference is taken when a CQ is created that uses the file, and * released when the CQ is destroyed. */ struct ib_uverbs_device { atomic_t refcount; int num_comp_vectors; struct completion comp; struct device *dev; struct ib_device __rcu *ib_dev; int devnum; struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; struct kobject kobj; struct srcu_struct disassociate_srcu; struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; }; struct ib_uverbs_event_file { struct kref ref; - struct file *filp; int is_async; struct ib_uverbs_file *uverbs_file; spinlock_t lock; int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; struct list_head list; }; struct ib_uverbs_file { struct kref ref; struct mutex mutex; struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_event_file *async_file; struct list_head list; int is_closed; }; struct ib_uverbs_event { union { struct ib_uverbs_async_event_desc async; struct ib_uverbs_comp_event_desc comp; } desc; struct list_head list; struct list_head obj_list; u32 *counter; }; struct ib_uverbs_mcast_entry { struct list_head list; union ib_gid gid; u16 lid; }; struct ib_uevent_object { struct ib_uobject uobject; struct list_head event_list; u32 events_reported; }; struct ib_uxrcd_object { struct ib_uobject uobject; atomic_t refcnt; }; struct ib_usrq_object { struct ib_uevent_object uevent; struct ib_uxrcd_object *uxrcd; }; struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; struct ib_uxrcd_object *uxrcd; }; struct ib_uwq_object { struct ib_uevent_object uevent; }; struct ib_ucq_object { struct ib_uobject uobject; struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; u32 async_events_reported; }; extern spinlock_t ib_uverbs_idr_lock; extern struct idr ib_uverbs_pd_idr; extern struct idr ib_uverbs_mr_idr; extern struct idr ib_uverbs_mw_idr; extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; extern struct idr ib_uverbs_wq_idr; extern struct idr ib_uverbs_rwq_ind_tbl_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev, int is_async); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj); void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); int uverbs_dealloc_mw(struct ib_mw *mw); struct ib_uverbs_flow_spec { union { union { struct ib_uverbs_flow_spec_hdr hdr; struct { __u32 type; __u16 size; __u16 reserved; }; }; struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; }; }; #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ struct ib_device *ib_dev, \ const char __user *buf, int in_len, \ int out_len) IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(rereg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(alloc_mw); IB_UVERBS_DECLARE_CMD(dealloc_mw); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(resize_cq); IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(open_qp); IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); IB_UVERBS_DECLARE_CMD(post_send); IB_UVERBS_DECLARE_CMD(post_recv); IB_UVERBS_DECLARE_CMD(post_srq_recv); IB_UVERBS_DECLARE_CMD(create_ah); IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ struct ib_device *ib_dev, \ struct ib_udata *ucore, \ struct ib_udata *uhw) IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); IB_UVERBS_DECLARE_EX_CMD(create_qp); IB_UVERBS_DECLARE_EX_CMD(create_wq); IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); #endif /* UVERBS_H */